From ba97e976287fb34121ad76fade029ddf2bd2c6b3 Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Thu, 4 Mar 2021 16:24:23 -0500 Subject: [PATCH 01/39] debugging md snapshot --- scripts/conformer_debug/run.py | 83 ++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 scripts/conformer_debug/run.py diff --git a/scripts/conformer_debug/run.py b/scripts/conformer_debug/run.py new file mode 100644 index 00000000..dad7378c --- /dev/null +++ b/scripts/conformer_debug/run.py @@ -0,0 +1,83 @@ +import numpy as np +from simtk import openmm, unit +from openmmforcefields.generators import SystemGenerator +from simtk.openmm.app import Simulation +from simtk.unit.quantity import Quantity +from openforcefield.topology import Molecule + +TEMPERATURE = 300 * unit.kelvin +STEP_SIZE = 1.0 * unit.femtosecond +COLLISION_RATE = 1.0 / unit.picosecond + +def run(): + mol = Molecule.from_smiles( + "[H][O][S]1(=[O])=[N][C]2=[C]([C]([N]([H])[H])=[N]1)[N]([H])[C]([c]1[c]([H])[c]([H])[c]([H])[c]([H])[c]1[H])=[N]2", + ) + + # assign partial charge + # mol.assign_partial_charges("am1bcc") + + # parameterize topology + topology = mol.to_topology().to_openmm() + + generator = SystemGenerator( + small_molecule_forcefield="gaff-1.81", + molecules=[mol], + ) + + # create openmm system + system = generator.create_system( + topology, + ) + + # use langevin integrator + integrator = openmm.LangevinIntegrator( + TEMPERATURE, COLLISION_RATE, STEP_SIZE, + ) + + # initialize simulation + simulation = Simulation( + topology=topology, system=system, integrator=integrator, + platform=openmm.Platform.getPlatformByName("Reference"), + ) + + import openforcefield + + # get conformer + mol.generate_conformers( + toolkit_registry=openforcefield.utils.RDKitToolkitWrapper(), + ) + + # put conformer in simulation + simulation.context.setPositions(mol.conformers[0]) + + # set velocities + simulation.context.setVelocitiesToTemperature(TEMPERATURE) + + # initialize empty list for samples. + samples = [] + + # minimize + simulation.minimizeEnergy() + + # loop through number of samples + for _ in range(100): + + # run MD for `self.n_steps_per_sample` steps + simulation.step(1000) + + # append samples to `samples` + samples.append( + simulation.context.getState(getPositions=True) + .getPositions(asNumpy=True) + .value_in_unit(unit.angstrom) + ) + + # put samples into an array + samples = np.array(samples) + + # print out maximum deviation from center of mass + print((samples - samples.mean(axis=(1, 2), keepdims=True)).max()) + +if __name__ == "__main__": + run() From bf630eb957a489c57574efe4fec8ff9f52396ce8 Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Wed, 10 Mar 2021 14:38:20 -0500 Subject: [PATCH 02/39] subtract nonbonded --- espaloma/data/md.py | 52 ++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/espaloma/data/md.py b/espaloma/data/md.py index ade2638d..94b79a8e 100644 --- a/espaloma/data/md.py +++ b/espaloma/data/md.py @@ -16,30 +16,29 @@ # CONSTANTS # ============================================================================= # simulation specs -TEMPERATURE = 500 * unit.kelvin -STEP_SIZE = 1 * unit.femtosecond -COLLISION_RATE = 1 / unit.picosecond +TEMPERATURE = 350 * unit.kelvin +STEP_SIZE = 1.0 * unit.femtosecond +COLLISION_RATE = 1.0 / unit.picosecond +EPSILON_MIN = 0.05 * unit.kilojoules_per_mole # ============================================================================= # MODULE FUNCTIONS # ============================================================================= def subtract_nonbonded_force( - g, forcefield="test_forcefields/smirnoff99Frosst.offxml", + g, forcefield="gaff-1.81", ): - # get the forcefield from str - if isinstance(forcefield, str): - forcefield = ForceField(forcefield) + # parameterize topology + topology = g.mol.to_topology().to_openmm() - # partial charge - g.mol.assign_partial_charges("gasteiger") # faster - - # parametrize topology - topology = g.mol.to_topology() + generator = SystemGenerator( + small_molecule_forcefield=forcefield, + molecules=[g.mol], + ) # create openmm system - system = forcefield.create_openmm_system( - topology, charge_from_molecules=[g.mol], + system = generator.create_system( + topology, ) # use langevin integrator, although it's not super useful here @@ -197,12 +196,12 @@ def __init__( def simulation_from_graph(self, g): """ Create simulation from moleucle """ # assign partial charge - # g.mol.assign_partial_charges("gasteiger") # faster + # g.mol.assign_partial_charges("am1bcc") # parameterize topology topology = g.mol.to_topology().to_openmm() - generator = SystemGenerator( + generator = systemgenerator( small_molecule_forcefield=self.forcefield, molecules=[g.mol], ) @@ -212,6 +211,14 @@ def simulation_from_graph(self, g): topology, ) + # set epsilon minimum to 0.05 kJ/mol + for force in system.getForces(): + if "Nonbonded" in force.__class__.__name__: + for particle_index in range(force.getNumParticles()): + charge, sigma, epsilon = force.getParticleParameters(particle_index) + if (epsilon < EPSILON_MIN): + force.setParticleParameters(particle_index, charge, sigma, EPSILON_MIN) + # use langevin integrator integrator = openmm.LangevinIntegrator( self.temperature, self.collision_rate, self.step_size @@ -219,7 +226,8 @@ def simulation_from_graph(self, g): # initialize simulation simulation = Simulation( - topology=topology, system=system, integrator=integrator + topology=topology, system=system, integrator=integrator, + platform=openmm.Platform.getPlatformByName("Reference"), ) import openforcefield @@ -232,9 +240,6 @@ def simulation_from_graph(self, g): # put conformer in simulation simulation.context.setPositions(g.mol.conformers[0]) - # minimize energy - simulation.minimizeEnergy() - # set velocities simulation.context.setVelocitiesToTemperature(self.temperature) @@ -267,6 +272,9 @@ def run(self, g, in_place=True): # initialize empty list for samples. samples = [] + # minimize + simulation.minimizeEnergy() + # loop through number of samples for _ in range(self.n_samples): @@ -280,6 +288,10 @@ def run(self, g, in_place=True): .value_in_unit(DISTANCE_UNIT) ) + # assert that energy is below zero + # final_energy = simulation.context.getState(getEnergy=True).getPotentialEnergy( + # ).value_in_unit(ENERGY_UNIT) + # put samples into an array samples = np.array(samples) From a4f50dcd364cf54c90c4717e2082c0a3fbf5f1f6 Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Mon, 15 Mar 2021 00:19:36 -0400 Subject: [PATCH 03/39] class-ii energy --- espaloma/mm/energy.py | 81 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/espaloma/mm/energy.py b/espaloma/mm/energy.py index 36244526..30304f70 100644 --- a/espaloma/mm/energy.py +++ b/espaloma/mm/energy.py @@ -44,6 +44,72 @@ def apply_angle(nodes, suffix=""): } +def apply_angle_ii(nodes, suffix=""): + return { + "u_urey_bradley%s" + % suffix: esp.mm.angle.urey_bradley( + x_between=nodes.data["x_between"], + k_urey_bradley=nodes.data["k_urey_bradley"], + eq_urey_bradley=nodes.data["eq_urey_bradley"], + ), + "u_bond_bond%s" + % suffix: esp.mm.angle.bond_bond( + x_left=nodes.data["x_left"], + x_right=nodes.data["x_right"], + eq_left=nodes.data["eq_left"], + eq_right=nodes.data["eq_right"], + k_bond_bond=nodes.data["k_bond_bond"], + ), + "u_bond_angle%s" + % suffix: esp.mm.angle.bond_angle( + x_left=nodes.data["x_left"], + x_right=nodes.data["x_right"], + x_angle=nodes.data["x"], + eq_left=nodes.data["eq_left"], + eq_right=nodes.data["eq_right"], + eq_angle=nodes.data["eq"], + k_bond_angle_left=nodes.data["k_bond_angle_left"], + k_bond_angle_right=nodes.data["k_bond_angle_right"], + ) + } + +def apply_torsion_ii(nodes, suffix=""): + """ Torsion energy in nodes. """ + return { + "u_angle_angle%s" + % suffix: esp.mm.torsion.angle_angle( + x_angle_left=nodes.data["x_angle_left"], + x_angle_right=nodes.data["x_angle_right"], + eq_angle_left=nodes.data["eq_angle_left"], + eq_angle_right=nodes.data["eq_angle_right"], + k_angle_angle=nodes.data["k_angle_angle"], + ), + "u_angle_angle_torsion%s" + % suffix: esp.mm.torsion.angle_angle_torsion( + x_angle_left=nodes.data["x_angle_left"], + x_angle_right=nodes.data["x_angle_right"], + eq_angle_left=nodes.data["eq_angle_left"], + eq_angle_right=nodes.data["eq_angle_right"], + x=nodes.data["x"], + k_angle_angle_torsion=nodes.data["k_angle_angle_torsion"], + ), + "u_bond_torsion%s" + % suffix: esp.mm.torsion.bond_torsion( + x_bond_left=nodes.data["x_bond_left"], + x_bond_center=nodes.data["x_bond_center"], + x_bond_right=nodes.data["x_bond_right"], + x=nodes.data["x"], + k_left_torsion=nodes.data["k_left_torsion"], + k_right_torsion=nodes.data["k_right_torsion"], + eq_left_torsion=nodes.data["eq_left_torsion"], + eq_right_torsion=nodes.data["eq_right_torsion"], + k_center_torsion=nodes.data["k_center_torsion"], + eq_center_torsion=nodes.data["eq_center_torsion"], + ) + } + + + def apply_torsion(nodes, suffix=""): """ Torsion energy in nodes. """ if ( @@ -153,7 +219,7 @@ def apply_nonbonded(nodes, scaling=1.0, suffix=""): # ENERGY IN GRAPH # ============================================================================= def energy_in_graph( - g, suffix="", terms=["n2", "n3", "n4"] + g, suffix="", terms=["n2", "n3", "n4"], ii=False, ): # "onefour", "nonbonded"]): """ Calculate the energy of a small molecule given parameters and geometry. @@ -202,11 +268,22 @@ def energy_in_graph( lambda node: apply_angle(node, suffix=suffix), ntype="n3", ) + if ii is True: + g.apply_nodes( + lambda node: apply_angle_ii(node, suffix=suffix), ntype="n3", + ) + if g.number_of_nodes("n4") > 0 and "n4" in terms: g.apply_nodes( lambda node: apply_torsion(node, suffix=suffix), ntype="n4", ) + + if ii is True: + g.apply_nodes( + lambda node: apply_angle_ii(node, suffix=suffix), ntype="n4", + ) + if g.number_of_nodes("n4_improper") > 0 and "n4_improper" in terms: g.apply_nodes( lambda node: apply_improper_torsion(node, suffix=suffix), @@ -245,7 +322,7 @@ def energy_in_graph( lambda node: { "u%s" % suffix: sum( - node.data["u_%s%s" % (term, suffix)] for term in terms if "u_%s%s" % (term, suffix) in node.data + node.data["u_%s%s" % (term, suffix)] for term in terms if "u_%s%s" % (term, suffix) in node.data ) }, ntype="g", From a0b72c58b33a3e9f305f1975a52c6b81cbdae51d Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Tue, 16 Mar 2021 11:09:14 -0400 Subject: [PATCH 04/39] qm scripts --- scripts/qca_qm/run.py | 244 ++++++++++++++++++++++++++++++++++++ scripts/qca_qm/run.sh | 19 +++ scripts/qca_qm/run_bn.sh | 20 +++ scripts/qca_qm/transform.py | 44 +++++++ scripts/qca_qm/transform.sh | 8 ++ 5 files changed, 335 insertions(+) create mode 100644 scripts/qca_qm/run.py create mode 100644 scripts/qca_qm/run.sh create mode 100644 scripts/qca_qm/run_bn.sh create mode 100644 scripts/qca_qm/transform.py create mode 100644 scripts/qca_qm/transform.sh diff --git a/scripts/qca_qm/run.py b/scripts/qca_qm/run.py new file mode 100644 index 00000000..06f011c4 --- /dev/null +++ b/scripts/qca_qm/run.py @@ -0,0 +1,244 @@ +# ============================================================================= +# IMPORTS +# ============================================================================= +import argparse +import os + +import numpy as np +import torch + +import espaloma as esp + +def run(args): + ''' + # define data + ds_tr = esp.data.qca.pfizer() + ds_vl = esp.data.qca.coverage() + print(len(ds_tr)) + print(len(ds_vl)) + print("loaded", flush=True) + + # get force field + forcefield = esp.graphs.legacy_force_field.LegacyForceField( + args.forcefield + ) + + # param / typing + operation = forcefield.parametrize + + # apply to dataset + ds_tr = ds_tr.apply(operation, in_place=True) + ds_vl = ds_vl.apply(operation, in_place=True) + print("parametrized", flush=True) + + # apply simulation + # make simulation + from espaloma.data.md import MoleculeVacuumSimulation + simulation = MoleculeVacuumSimulation( + n_samples=500, n_steps_per_sample=100, + ) + print("simulated", flush=True) + + ds_tr = ds_tr.apply(simulation.run, in_place=True) + ds_vl = ds_vl.apply(simulation.run, in_place=True) + + ds_tr.save('pfizer') + ds_vl.save('coverage') + ''' + + pfizer = esp.data.dataset.GraphDataset.load('pfizer') + coverage = esp.data.dataset.GraphDataset.load('coverage') + emolecules = esp.data.dataset.GraphDataset.load('emolecules') + bayer = esp.data.dataset.GraphDataset.load('bayer') + roche = esp.data.dataset.GraphDataset.load('roche') + fda = esp.data.dataset.GraphDataset.load('fda') + # benchmark = esp.data.dataset.GraphDataset.load("benchmark") + + + _ds_tr = pfizer + coverage + emolecules + bayer + roche + _ds_vl = fda # benchmark + + print(len(_ds_tr)) + print(len(_ds_vl)) + + # batch + ds_tr = _ds_tr.view("graph", batch_size=100, shuffle=False) + ds_vl = _ds_vl.view("graph", batch_size=100) + + g = next(iter(ds_tr)) + esp.mm.geometry.geometry_in_graph(g) + + # layer + layer = esp.nn.layers.dgl_legacy.gn(args.layer) + + # representation + representation = esp.nn.Sequential(layer, config=args.config) + + # get the last bit of units + units = [int(x) for x in args.config if isinstance(x, int) or (isinstance(x, str) and x.isdigit())][-1] + + janossy_config = [] + for x in args.janossy_config: + if isinstance(x, int): + janossy_config.append(int(x)) + + elif x.isdigit(): + janossy_config.append(int(x)) + + else: + janossy_config.append(x) + + readout = esp.nn.readout.janossy.JanossyPooling( + in_features=units, config=janossy_config, + out_features={ + 2: {'log_coefficients': 2}, + 3: {'log_coefficients': 2}, + 4: {'k': 6}, + }, + ) + + readout_improper = esp.nn.readout.janossy.JanossyPoolingImproper( + in_features=units, config=janossy_config + ) + + class ExpCoeff(torch.nn.Module): + def forward(self, g): + g.nodes['n2'].data['coefficients'] = g.nodes['n2'].data['log_coefficients'].exp() + g.nodes['n3'].data['coefficients'] = g.nodes['n3'].data['log_coefficients'].exp() + return g + + + net = torch.nn.Sequential( + representation, + readout, + readout_improper, + ExpCoeff(), + esp.mm.geometry.GeometryInGraph(), + esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4", "n4_improper"]), + ) + + + torch.nn.init.normal_( + net[1].f_out_2_to_log_coefficients.bias, + mean=-5, + ) + + torch.nn.init.normal_( + net[1].f_out_3_to_log_coefficients.bias, + mean=-5, + ) + + + # net = net.cuda() + metrics_tr = [ + esp.metrics.GraphMetric( + base_metric=esp.metrics.center(torch.nn.MSELoss(reduction='none'), reduction="mean"), + between=['u', "u_ref"], + level="g", + ), + ] + + metrics_te = [ + esp.metrics.GraphMetric( + base_metric=esp.metrics.center(esp.metrics.rmse), + between=['u', "u_ref"], + level="g", + ), + ] + + + optimizer = torch.optim.Adam(net.parameters(), args.lr) + + exp = esp.TrainAndTest( + ds_tr=ds_tr, + ds_te=ds_vl, + net=net, + metrics_tr=metrics_tr, + metrics_te=metrics_te, + n_epochs=args.n_epochs, + normalize=esp.data.normalize.NotNormalize, + record_interval=100, + optimizer=optimizer, + device=torch.device('cuda'), + ) + + results = exp.run() + + print(esp.app.report.markdown(results), flush=True) + + curves = esp.app.report.curve(results) + + import os + os.mkdir(args.out) + for spec, curve in curves.items(): + np.save(args.out + "/" + "_".join(spec) + ".npy", curve) + + net = net.cpu() + + import pandas as pd + df = pd.DataFrame(columns=["SMILES", "RMSE", "n_snapshots"]) + import os + torch.save(net.state_dict(), args.out + "/net.th") + + ''' + for g in _ds_tr: + net(g.heterograph) + + _df = { + 'SMILES': g.mol.to_smiles(), + 'RMSE': 625 * esp.metrics.center(esp.metrics.rmse)(g.nodes['g'].data['u_ref'], g.nodes['g'].data['u']).cpu().detach().numpy().item(), + 'n_snapshots': g.nodes['n1'].data['xyz'].shape[1] + } + + df = df.append(_df, ignore_index=True) + + df.to_csv(args.out + "/inspect_tr_%s.csv" % args.first) + + for g in _ds_vl: + net(g.heterograph) + + _df = { + 'SMILES': g.mol.to_smiles(), + 'RMSE': 625 * esp.metrics.center(esp.metrics.rmse)(g.nodes['g'].data['u_ref'], g.nodes['g'].data['u']).cpu().detach().numpy().item(), + 'n_snapshots': g.nodes['n1'].data['xyz'].shape[1] + } + + df = df.append(_df, ignore_index=True) + + df.to_csv(args.out + "/inspect_vl_%s.csv" % args.first) + ''' +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--forcefield", default="gaff-1.81", type=str) + parser.add_argument("--layer", default="GraphConv", type=str) + parser.add_argument("--n_classes", default=100, type=int) + parser.add_argument( + "--config", nargs="*", default=[128, "leaky_relu", 128, "leaky_relu", 128, "leaky_relu"], + ) + + parser.add_argument( + "--training_metrics", nargs="*", default=["TypingCrossEntropy"] + ) + parser.add_argument( + "--test_metrics", nargs="*", default=["TypingAccuracy"] + ) + parser.add_argument( + "--out", default="results", type=str + ) + parser.add_argument("--janossy_config", nargs="*", default=[32, "leaky_relu"]) + + parser.add_argument("--graph_act", type=str, default="leaky_relu") + + parser.add_argument("--n_epochs", default=10, type=int) + + parser.add_argument("--weight", default=1.0, type=float) + + parser.add_argument("--lr", default=1e-5, type=float) + parser.add_argument("--batch_size", default=32, type=float) + parser.add_argument("--first", default=32, type=int) + args = parser.parse_args() + + run(args) + diff --git a/scripts/qca_qm/run.sh b/scripts/qca_qm/run.sh new file mode 100644 index 00000000..2c970dc5 --- /dev/null +++ b/scripts/qca_qm/run.sh @@ -0,0 +1,19 @@ +#BSUB -q gpuqueue +#BSUB -o %J.stdout +#BSUB -gpu "num=1:j_exclusive=yes" +#BSUB -R "rusage[mem=30] span[ptile=1]" +#BSUB -W 36:00 +#BSUB -n 1 + +n_epochs=3000 +layer="SAGEConv" +units=128 +act="relu" +lr=1e-4 +graph_act="relu" +weight=1.0 + +python run.py --n_epochs $n_epochs --layer $layer --config $units $act $units $act $units $act --janossy_config $units $act $units $act $units $act --weight $weight --out $units"_"$layer"_"$act"_"$weight"_"$lr"_"$n_epochs"_"$small_batch"_"$big_batch"_single_gpu_janossy_first_distributed" --lr $lr --graph_act $act + + + diff --git a/scripts/qca_qm/run_bn.sh b/scripts/qca_qm/run_bn.sh new file mode 100644 index 00000000..8330a2fc --- /dev/null +++ b/scripts/qca_qm/run_bn.sh @@ -0,0 +1,20 @@ +#BSUB -q gpuqueue +#BSUB -o %J.stdout +#BSUB -gpu "num=1:j_exclusive=yes" +#BSUB -R "rusage[mem=30] span[ptile=1]" +#BSUB -W 8:00 +#BSUB -n 1 + +n_epochs=3000 +layer="SAGEConv" +units=128 +act="relu" +lr=1e-4 +graph_act="relu" +weight=1.0 +first=10 + +python run.py --n_epochs $n_epochs --layer $layer --config $units "bn" $act $units "bn" $act $units "bn" $act --janossy_config $units "bn" $act $units "bn" $act $units "bn" $act --weight $weight --out $units"_"$layer"_"$act"_"$weight"_"$lr"_"$n_epochs"_"$small_batch"_"$big_batch"_single_gpu_janossy_first_distributed"$first"bn" --lr $lr --graph_act $act --first $first + + + diff --git a/scripts/qca_qm/transform.py b/scripts/qca_qm/transform.py new file mode 100644 index 00000000..5bc65cb5 --- /dev/null +++ b/scripts/qca_qm/transform.py @@ -0,0 +1,44 @@ +import espaloma as esp + +def run(in_path, out_path, batch_size=32, u_threshold=0.05): + g = esp.Graph.load(in_path) + from espaloma.data.md import subtract_nonbonded_force + g = subtract_nonbonded_force(g) + + # get number of snapshots + n_data = g.nodes['n1'].data['xyz'].shape[1] + print("n_data: ", n_data) + u_min = g.nodes['g'].data['u_ref'].min().item() + + # original indicies + idxs = list(range(n_data)) + idxs = [idx for idx in idxs if g.nodes['g'].data['u_ref'][:, idx].item() < u_min + u_threshold] + n_data = len(idxs) + + import random + # additional indicies + additional_n_data = batch_size - (n_data % batch_size) + additional_idxs = random.choices(idxs, k=additional_n_data) + idxs += additional_idxs + random.shuffle(idxs) + assert len(idxs) % batch_size == 0 + + + # put data into chunks + import copy + n_chunks = len(idxs) // batch_size + for idx_chunk in range(n_chunks): + print("idx: ", idx_chunk) + chunk_idxs = idxs[idx_chunk*batch_size : (idx_chunk+1)*batch_size] + _g = copy.deepcopy(g) + _g.nodes['n1'].data['xyz'] = g.nodes['n1'].data['xyz'][:, chunk_idxs, :] + _g.nodes['n1'].data['u_ref_prime'] = g.nodes['n1'].data['u_ref_prime'][:, chunk_idxs, :] + _g.nodes['g'].data['u_ref'] = g.nodes['g'].data['u_ref'][:, chunk_idxs] + _g.save(out_path+str(idx_chunk)) + +if __name__ == "__main__": + import sys + run(sys.argv[1], sys.argv[2]) + + + diff --git a/scripts/qca_qm/transform.sh b/scripts/qca_qm/transform.sh new file mode 100644 index 00000000..c4d30b0e --- /dev/null +++ b/scripts/qca_qm/transform.sh @@ -0,0 +1,8 @@ +for data in bayer benchmark zinc emolecules fda pfizer roche coverage +do + echo $data + mkdir $data + let n_molecules=$(ls "_"$data | wc -l) + echo $n_molecules + bsub -J $data"[1-"$n_molecules"]" -o %J.stdout -W 2:00 python transform.py "_"$data/\$LSB_JOBINDEX $data/\$LSB_JOBINDEX +done From e54ee00d1a922df819499053a9dfa80a02f16406 Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Tue, 16 Mar 2021 11:46:03 -0400 Subject: [PATCH 05/39] class_ii --- espaloma/mm/energy.py | 10 +++--- scripts/qca_qm/run.py | 76 ++++++++++++++++++++++++++++++++++++++----- 2 files changed, 71 insertions(+), 15 deletions(-) diff --git a/espaloma/mm/energy.py b/espaloma/mm/energy.py index 30304f70..da3ca3d9 100644 --- a/espaloma/mm/energy.py +++ b/espaloma/mm/energy.py @@ -68,8 +68,8 @@ def apply_angle_ii(nodes, suffix=""): eq_left=nodes.data["eq_left"], eq_right=nodes.data["eq_right"], eq_angle=nodes.data["eq"], - k_bond_angle_left=nodes.data["k_bond_angle_left"], - k_bond_angle_right=nodes.data["k_bond_angle_right"], + k_bond_angle_left=nodes.data["k_bond_angle"], + k_bond_angle_right=nodes.data["k_bond_angle"], ) } @@ -99,8 +99,8 @@ def apply_torsion_ii(nodes, suffix=""): x_bond_center=nodes.data["x_bond_center"], x_bond_right=nodes.data["x_bond_right"], x=nodes.data["x"], - k_left_torsion=nodes.data["k_left_torsion"], - k_right_torsion=nodes.data["k_right_torsion"], + k_left_torsion=nodes.data["k_side_torsion"], + k_right_torsion=nodes.data["k_side_torsion"], eq_left_torsion=nodes.data["eq_left_torsion"], eq_right_torsion=nodes.data["eq_right_torsion"], k_center_torsion=nodes.data["k_center_torsion"], @@ -197,7 +197,6 @@ def apply_angle_linear_mixture(nodes, suffix="", phases=[0.0, 1.0]): ) } - # ============================================================================= # ENERGY IN HYPERNODES---NONBONDED # ============================================================================= @@ -273,7 +272,6 @@ def energy_in_graph( lambda node: apply_angle_ii(node, suffix=suffix), ntype="n3", ) - if g.number_of_nodes("n4") > 0 and "n4" in terms: g.apply_nodes( lambda node: apply_torsion(node, suffix=suffix), ntype="n4", diff --git a/scripts/qca_qm/run.py b/scripts/qca_qm/run.py index 06f011c4..f7241481 100644 --- a/scripts/qca_qm/run.py +++ b/scripts/qca_qm/run.py @@ -6,6 +6,7 @@ import numpy as np import torch +import dgl import espaloma as esp @@ -50,7 +51,7 @@ def run(args): coverage = esp.data.dataset.GraphDataset.load('coverage') emolecules = esp.data.dataset.GraphDataset.load('emolecules') bayer = esp.data.dataset.GraphDataset.load('bayer') - roche = esp.data.dataset.GraphDataset.load('roche') + roche = esp.data.dataset.GraphDataset.load('roche') fda = esp.data.dataset.GraphDataset.load('fda') # benchmark = esp.data.dataset.GraphDataset.load("benchmark") @@ -92,8 +93,21 @@ def run(args): in_features=units, config=janossy_config, out_features={ 2: {'log_coefficients': 2}, - 3: {'log_coefficients': 2}, - 4: {'k': 6}, + 3: { + 'log_coefficients': 2, + 'k_urey_bradley': 1, + 'eq_urey_bradley': 1, + 'k_bond_bond': 1, + 'k_bond_angle': 1, + 'k_bond_angle': 1, + }, + 4: { + 'k': 6, + 'k_angle_angle': 1, + 'k_angle_angle_torsion': 1, + 'k_side_torsion': 1, + 'k_center_torsion': 1, + }, }, ) @@ -107,6 +121,51 @@ def forward(self, g): g.nodes['n3'].data['coefficients'] = g.nodes['n3'].data['log_coefficients'].exp() return g + class CarryII(torch.nn.Module): + def forward(self, g): + _, g.nodes['n2'].data['_eq'] = esp.mm.functional.linear_mixture_to_original( + g.nodes['n2'].data['coefficients'][:, 0][:, None], + g.nodes['n2'].data['coefficients'][:, 1][:, None], + ) + + _, g.nodes['n3'].data['_eq'] = esp.mm.functional.linear_mixture_to_original( + g.nodes['n3'].data['coefficients'][:, 0][:, None], + g.nodes['n3'].data['coefficients'][:, 1][:, None], + ) + + g.multi_update_all( + { + "n2_as_0_in_n3": ( + dgl.function.copy_src("_eq", "m_eq_0"), + dgl.function.sum("m_eq_0", "eq_left"), + ), + "n2_as_1_in_n3": ( + dgl.function.copy_src("_eq", "m_eq_1"), + dgl.function.sum("m_eq_1", "eq_right"), + ), + "n2_as_0_in_n4": ( + dgl.function.copy_src("_eq", "m_eq_0"), + dgl.function.sum("m_eq_0", "eq_left_torsion"), + ), + "n2_as_1_in_n4": ( + dgl.function.copy_src("_eq", "m_eq_1"), + dgl.function.sum("m_eq_1", "eq_center_torsion"), + ), + "n2_as_2_in_n4": ( + dgl.function.copy_src("_eq", "m_eq_2"), + dgl.function.sum("m_eq_2", "eq_right_torsion"), + ), + "n3_as_0_in_n4": ( + dgl.function.copy_src("_eq", "m3_eq_0"), + dgl.function.sum("m3_eq_0", "eq_angle_left"), + ), + "n3_as_1_in_n4": ( + dgl.function.copy_src("_eq", "m3_eq_1"), + dgl.function.sum("m3_eq_1", "eq_angle_right"), + ) + }, + cross_reducer="sum" + ) net = torch.nn.Sequential( representation, @@ -114,7 +173,7 @@ def forward(self, g): readout_improper, ExpCoeff(), esp.mm.geometry.GeometryInGraph(), - esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4", "n4_improper"]), + esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4", "n4_improper"], ii=True), ) @@ -165,7 +224,7 @@ def forward(self, g): results = exp.run() print(esp.app.report.markdown(results), flush=True) - + curves = esp.app.report.curve(results) import os @@ -183,7 +242,7 @@ def forward(self, g): ''' for g in _ds_tr: net(g.heterograph) - + _df = { 'SMILES': g.mol.to_smiles(), 'RMSE': 625 * esp.metrics.center(esp.metrics.rmse)(g.nodes['g'].data['u_ref'], g.nodes['g'].data['u']).cpu().detach().numpy().item(), @@ -196,7 +255,7 @@ def forward(self, g): for g in _ds_vl: net(g.heterograph) - + _df = { 'SMILES': g.mol.to_smiles(), 'RMSE': 625 * esp.metrics.center(esp.metrics.rmse)(g.nodes['g'].data['u_ref'], g.nodes['g'].data['u']).cpu().detach().numpy().item(), @@ -229,7 +288,7 @@ def forward(self, g): ) parser.add_argument("--janossy_config", nargs="*", default=[32, "leaky_relu"]) - parser.add_argument("--graph_act", type=str, default="leaky_relu") + parser.add_argument("--graph_act", type=str, default="leaky_relu") parser.add_argument("--n_epochs", default=10, type=int) @@ -241,4 +300,3 @@ def forward(self, g): args = parser.parse_args() run(args) - From d6c609b80d07380d9b8718307ce2e7007641b7b4 Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Tue, 16 Mar 2021 11:49:06 -0400 Subject: [PATCH 06/39] carry ii --- scripts/qca_qm/run.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/qca_qm/run.py b/scripts/qca_qm/run.py index f7241481..257f76c5 100644 --- a/scripts/qca_qm/run.py +++ b/scripts/qca_qm/run.py @@ -167,11 +167,14 @@ def forward(self, g): cross_reducer="sum" ) + return g + net = torch.nn.Sequential( representation, readout, readout_improper, ExpCoeff(), + CarryII(), esp.mm.geometry.GeometryInGraph(), esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4", "n4_improper"], ii=True), ) From 3b75aadb7ee64ba6d58e373e385340bd5dece96f Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Tue, 16 Mar 2021 14:21:23 -0400 Subject: [PATCH 07/39] graphs --- espaloma/graphs/legacy_force_field.py | 86 ++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 7 deletions(-) diff --git a/espaloma/graphs/legacy_force_field.py b/espaloma/graphs/legacy_force_field.py index 64592ed6..f9e62cfd 100644 --- a/espaloma/graphs/legacy_force_field.py +++ b/espaloma/graphs/legacy_force_field.py @@ -168,7 +168,7 @@ def _type_gaff(self, g): return g - def _parametrize_gaff(self, g): + def _parametrize_gaff(self, g, n_max_phases=6): from openmmforcefields.generators import SystemGenerator # define a system generator @@ -177,9 +177,8 @@ def _parametrize_gaff(self, g): ) mol = g.mol - print(mol, flush=True) mol.assign_partial_charges("formal_charge") - print(mol, flush=True) + # create system sys = system_generator.create_system( topology=mol.to_topology().to_openmm(), @@ -196,6 +195,37 @@ def _parametrize_gaff(self, g): for position, idxs in enumerate(g.nodes["n3"].data["idxs"]) } + torsion_lookup = { + tuple(idxs.detach().numpy()): position + for position, idxs in enumerate(g.nodes["n4"].data["idxs"]) + } + + improper_lookup = { + tuple(idxs.detach().numpy()): position + for position, idxs in enumerate(g.nodes["n4_improper"].data["idxs"]) + } + + torsion_phases = torch.zeros( + g.heterograph.number_of_nodes("n4"), n_max_phases, + ) + + torsion_periodicities = torch.zeros( + g.heterograph.number_of_nodes("n4"), n_max_phases, + ) + + torsion_ks = torch.zeros(g.heterograph.number_of_nodes("n4"), n_max_phases,) + + + improper_phases = torch.zeros( + g.heterograph.number_of_nodes("n4"), n_max_phases, + ) + + improper_periodicities = torch.zeros( + g.heterograph.number_of_nodes("n4"), n_max_phases, + ) + + improper_ks = torch.zeros(g.heterograph.number_of_nodes("n4"), n_max_phases,) + for force in sys.getForces(): name = force.__class__.__name__ if "HarmonicBondForce" in name: @@ -218,7 +248,7 @@ def _parametrize_gaff(self, g): g.nodes["n2"].data["eq_ref"][position] = eq.value_in_unit( esp.units.DISTANCE_UNIT, ) - g.nodes["n2"].data["k_ref"][position] = (0.5 * k).value_in_unit( + g.nodes["n2"].data["k_ref"][position] = k.value_in_unit( esp.units.FORCE_CONSTANT_UNIT, ) @@ -226,7 +256,7 @@ def _parametrize_gaff(self, g): g.nodes["n2"].data["eq_ref"][position] = eq.value_in_unit( esp.units.DISTANCE_UNIT, ) - g.nodes["n2"].data["k_ref"][position] = (0.5 * k).value_in_unit( + g.nodes["n2"].data["k_ref"][position] = k.value_in_unit( esp.units.FORCE_CONSTANT_UNIT, ) @@ -251,7 +281,7 @@ def _parametrize_gaff(self, g): g.nodes["n3"].data["eq_ref"][position] = eq.value_in_unit( esp.units.ANGLE_UNIT, ) - g.nodes["n3"].data["k_ref"][position] = (0.5 * k).value_in_unit( + g.nodes["n3"].data["k_ref"][position] = k.value_in_unit( esp.units.ANGLE_FORCE_CONSTANT_UNIT, ) @@ -259,9 +289,51 @@ def _parametrize_gaff(self, g): g.nodes["n3"].data["eq_ref"][position] = eq.value_in_unit( esp.units.ANGLE_UNIT, ) - g.nodes["n3"].data["k_ref"][position] = (0.5 * k).value_in_unit( + g.nodes["n3"].data["k_ref"][position] = k.value_in_unit( esp.units.ANGLE_FORCE_CONSTANT_UNIT, ) + + if "PeriodicTorsionForce" in name: + for idx in range(force.getNumTorsions()): + idx0, idx1, idx2, idx3, periodicity, phase, k = force.getTorsionParameters( + idx + ) + + if (idx0, idx1, idx2, idx3) in torsion_lookup: + position = torsion_lookup[(idx0, idx1, idx2, idx3)] + for sub_idx in range(n_max_phases): + if torsion_ks[position, sub_idx] == 0: + torsion_ks[position, sub_idx] = 0.5 * k.value_in_unit(esp.units.ENERGY_UNIT) + torsion_phases[position, sub_idx] = phase.value_in_unit(esp.units.ANGLE_UNIT) + torsion_periodicities[position, sub_idx] = periodicity + + position = torsion_lookup[(idx3, idx2, idx1, idx0)] + torsion_ks[position, sub_idx] = 0.5 * k.value_in_unit(esp.units.ENERGY_UNIT) + torsion_phases[position, sub_idx] = phase.value_in_unit(esp.units.ANGLE_UNIT) + torsion_periodicities[position, sub_idx] = periodicity + break + + g.heterograph.apply_nodes( + lambda nodes: { + "k_ref": torsion_ks, + "periodicity_ref": torsion_periodicities, + "phases_ref": torsion_phases, + }, + ntype="n4" + ) + + ''' + g.heterograph.apply_nodes( + lambda nodes: { + "k_ref": improper_ks, + "periodicity_ref": improper_periodicities, + "phases_ref": improper_phases, + }, + ntype="n4_improper" + ) + + ''' + ''' def apply_torsion(node, n_max_phases=6): phases = torch.zeros( From 2a9e22b13e1ded9ff5f515d33f809e9fada19906 Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Tue, 16 Mar 2021 14:23:01 -0400 Subject: [PATCH 08/39] data md --- espaloma/data/md.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/espaloma/data/md.py b/espaloma/data/md.py index 94b79a8e..f3122589 100644 --- a/espaloma/data/md.py +++ b/espaloma/data/md.py @@ -34,6 +34,7 @@ def subtract_nonbonded_force( generator = SystemGenerator( small_molecule_forcefield=forcefield, molecules=[g.mol], + forcefield_kwargs={ 'constraints' : None, 'removeCMMotion' : False}, ) # create openmm system @@ -201,7 +202,7 @@ def simulation_from_graph(self, g): # parameterize topology topology = g.mol.to_topology().to_openmm() - generator = systemgenerator( + generator = SystemGenerator( small_molecule_forcefield=self.forcefield, molecules=[g.mol], ) From 8082cea06d4d82386b63eae3f7dac7d105d44a8a Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Tue, 16 Mar 2021 14:47:01 -0400 Subject: [PATCH 09/39] class ii running --- espaloma/mm/angle.py | 35 +++++++ espaloma/mm/energy.py | 2 +- espaloma/mm/functional.py | 57 +++++++++++ espaloma/mm/geometry.py | 39 +++++++- espaloma/mm/tests/test_energy_ii.py | 142 ++++++++++++++++++++++++++++ espaloma/mm/torsion.py | 86 +++++++++++++++++ scripts/qca_qm/run.py | 15 ++- 7 files changed, 365 insertions(+), 11 deletions(-) create mode 100644 espaloma/mm/tests/test_energy_ii.py diff --git a/espaloma/mm/angle.py b/espaloma/mm/angle.py index a86c7e92..2b9f20c4 100644 --- a/espaloma/mm/angle.py +++ b/espaloma/mm/angle.py @@ -38,3 +38,38 @@ def linear_mixture_angle(x, coefficients, phases): """ return esp.mm.functional.linear_mixture(x=x, coefficients=coefficients, phases=phases) + + +def urey_bradley(x_between, k_urey_bradley, eq_urey_bradley): + return esp.mm.functional.harmonic( + x=x_between, + k=k_urey_bradley, + eq=eq_urey_bradley, + ) + +def bond_bond(x_left, x_right, eq_left, eq_right, k_bond_bond): + return esp.mm.functional.harmonic_harmonic_coupled( + x0=x_left, + x1=x_right, + eq0=eq_left, + eq1=eq_right, + k=k_bond_bond, + ) + +def bond_angle( + x_left, x_right, x_angle, eq_left, eq_right, eq_angle, + k_bond_angle_left, k_bond_angle_right, + ): + return esp.mm.functional.harmonic_harmonic_coupled( + x0=x_left, + x1=x_angle, + eq0=eq_left, + eq1=eq_angle, + k=k_bond_angle_left, + ) + esp.mm.functional.harmonic_harmonic_coupled( + x0=x_right, + x1=x_angle, + eq0=eq_right, + eq1=eq_angle, + k=k_bond_angle_right, + ) diff --git a/espaloma/mm/energy.py b/espaloma/mm/energy.py index da3ca3d9..74cdf5a0 100644 --- a/espaloma/mm/energy.py +++ b/espaloma/mm/energy.py @@ -279,7 +279,7 @@ def energy_in_graph( if ii is True: g.apply_nodes( - lambda node: apply_angle_ii(node, suffix=suffix), ntype="n4", + lambda node: apply_torsion_ii(node, suffix=suffix), ntype="n4", ) if g.number_of_nodes("n4_improper") > 0 and "n4_improper" in terms: diff --git a/espaloma/mm/functional.py b/espaloma/mm/functional.py index 3874ff40..771692b6 100644 --- a/espaloma/mm/functional.py +++ b/espaloma/mm/functional.py @@ -285,3 +285,60 @@ def linear_mixture(x, coefficients, phases=[0.0, 1.0]): u = u1 + u2 # - k1 * b1 ** 2 - k2 ** b2 ** 2 + b ** 2 return u + + +def harmonic_periodic_coupled( + x_harmonic, + x_periodic, + k, + eq, + periodicity=list(range(1, 3)), + ): + + if isinstance(periodicity, list): + periodicity = torch.tensor( + periodicity, + device=x_harmonic.device, + dtype=torch.get_default_dtype(), + ) + + n_theta = periodicity[None, None, :].repeat( + x_periodic.shape[0], + x_periodic.shape[1], + 1 + ) * x_periodic[:, :, None] + + cos_n_theta = n_theta.cos() + + k = k[:, None, :].repeat( + 1, x_periodic.shape[1], 1 + ) + + sum_k_cos_n_theta = (k * cos_n_theta).sum(dim=-1) + + x_minus_eq = x_harmonic - eq + + energy = x_minus_eq * sum_k_cos_n_theta + + return energy + +def harmonic_harmonic_coupled( + x0, + x1, + eq0, + eq1, + k, + ): + energy = k * (x0 - eq0) * (x1 - eq1) + return energy + +def harmonic_harmonic_periodic_coupled( + theta0, + theta1, + eq0, + eq1, + phi, + k, + ): + energy = k * (theta0 - eq0) * (theta1 - eq1) * phi.cos() + return energy diff --git a/espaloma/mm/geometry.py b/espaloma/mm/geometry.py index 5dab95b6..b74406f9 100644 --- a/espaloma/mm/geometry.py +++ b/espaloma/mm/geometry.py @@ -103,7 +103,21 @@ def apply_angle(nodes): """ Angle values in nodes. """ return { "x": angle( - x0=nodes.data["xyz0"], x1=nodes.data["xyz1"], x2=nodes.data["xyz2"] + x0=nodes.data["xyz0"], + x1=nodes.data["xyz1"], + x2=nodes.data["xyz2"], + ), + "x_left": distance( + x0=nodes.data["xyz1"], + x1=nodes.data["xyz0"], + ), + "x_right": distance( + x0=nodes.data["xyz1"], + x1=nodes.data["xyz2"], + ), + "x_between": distance( + x0=nodes.data["xyz0"], + x1=nodes.data["xyz2"], ) } @@ -116,10 +130,33 @@ def apply_torsion(nodes): x1=nodes.data["xyz1"], x2=nodes.data["xyz2"], x3=nodes.data["xyz3"], + ), + "x_bond_left": distance( + x0=nodes.data["xyz0"], + x1=nodes.data["xyz1"], + ), + "x_bond_center": distance( + x0=nodes.data["xyz1"], + x1=nodes.data["xyz2"], + ), + "x_bond_right": distance( + x0=nodes.data["xyz2"], + x1=nodes.data["xyz3"], + ), + "x_angle_left": angle( + x0=nodes.data["xyz0"], + x1=nodes.data["xyz1"], + x2=nodes.data["xyz2"], + ), + "x_angle_right": angle( + x0=nodes.data["xyz1"], + x1=nodes.data["xyz2"], + x2=nodes.data["xyz3"], ) } + # ============================================================================= # GEOMETRY IN GRAPH # ============================================================================= diff --git a/espaloma/mm/tests/test_energy_ii.py b/espaloma/mm/tests/test_energy_ii.py new file mode 100644 index 00000000..4ee926bc --- /dev/null +++ b/espaloma/mm/tests/test_energy_ii.py @@ -0,0 +1,142 @@ +import pytest +import espaloma as esp +import torch +import dgl + +def test_energy(): + g = esp.Graph("c1ccccc1") + + # make simulation + from espaloma.data.md import MoleculeVacuumSimulation + + simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10) + g = simulation.run(g, in_place=True) + + param = esp.graphs.legacy_force_field.LegacyForceField( + "gaff-1.81" + ).parametrize + + g = param(g) + + # parametrize + + # layer + layer = esp.nn.layers.dgl_legacy.gn() + + # representation + representation = esp.nn.Sequential(layer, config=[32, 'relu', 32, 'relu', 32, 'relu']) + + # get the last bit of units + units = 32 + + janossy_config = [32, 'relu'] + + readout = esp.nn.readout.janossy.JanossyPooling( + in_features=units, config=janossy_config, + out_features={ + 2: {'log_coefficients': 2}, + 3: { + 'log_coefficients': 2, + 'k_urey_bradley': 1, + 'eq_urey_bradley': 1, + 'k_bond_bond': 1, + 'k_bond_angle': 1, + 'k_bond_angle': 1, + }, + 4: { + 'k': 6, + 'k_angle_angle': 1, + 'k_angle_angle_torsion': 1, + 'k_side_torsion': 1, + 'k_center_torsion': 1, + }, + }, + ) + + readout_improper = esp.nn.readout.janossy.JanossyPoolingImproper( + in_features=units, config=janossy_config + ) + + class ExpCoeff(torch.nn.Module): + def forward(self, g): + g.nodes['n2'].data['coefficients'] = g.nodes['n2'].data['log_coefficients'].exp() + g.nodes['n3'].data['coefficients'] = g.nodes['n3'].data['log_coefficients'].exp() + return g + + class CarryII(torch.nn.Module): + def forward(self, g): + import math + _, g.nodes['n2'].data['eq'] = esp.mm.functional.linear_mixture_to_original( + g.nodes['n2'].data['coefficients'][:, 0][:, None], + g.nodes['n2'].data['coefficients'][:, 1][:, None], + 1.5, 4.0, + ) + + _, g.nodes['n3'].data['eq'] = esp.mm.functional.linear_mixture_to_original( + g.nodes['n3'].data['coefficients'][:, 0][:, None], + g.nodes['n3'].data['coefficients'][:, 1][:, None], + 0.0, math.pi, + ) + + g.multi_update_all( + { + "n2_as_0_in_n3": ( + dgl.function.copy_src("eq", "m_eq_0"), + dgl.function.sum("m_eq_0", "eq_left"), + ), + "n2_as_1_in_n3": ( + dgl.function.copy_src("eq", "m_eq_1"), + dgl.function.sum("m_eq_1", "eq_right"), + ), + "n2_as_0_in_n4": ( + dgl.function.copy_src("eq", "m_eq_0"), + dgl.function.sum("m_eq_0", "eq_left_torsion"), + ), + "n2_as_1_in_n4": ( + dgl.function.copy_src("eq", "m_eq_1"), + dgl.function.sum("m_eq_1", "eq_center_torsion"), + ), + "n2_as_2_in_n4": ( + dgl.function.copy_src("eq", "m_eq_2"), + dgl.function.sum("m_eq_2", "eq_right_torsion"), + ), + "n3_as_0_in_n4": ( + dgl.function.copy_src("eq", "m3_eq_0"), + dgl.function.sum("m3_eq_0", "eq_angle_left"), + ), + "n3_as_1_in_n4": ( + dgl.function.copy_src("eq", "m3_eq_1"), + dgl.function.sum("m3_eq_1", "eq_angle_right"), + ) + }, + cross_reducer="sum" + ) + + return g + + net = torch.nn.Sequential( + representation, + readout, + readout_improper, + ExpCoeff(), + CarryII(), + esp.mm.geometry.GeometryInGraph(), + esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4", "n4_improper"], ii=True), + ) + + torch.nn.init.normal_(net[1].f_out_2_to_log_coefficients.bias, mean=-5,) + torch.nn.init.normal_(net[1].f_out_3_to_log_coefficients.bias, mean=-5,) + + for name, module in net[1].named_modules(): + if "k" in name: + torch.nn.init.normal(module.bias, mean=0.0, std=1e-4) + torch.nn.init.normal(module.weight, mean=0.0, std=1e-4) + + g = net(g.heterograph) + + print(g.nodes['n3'].data) + print(g.nodes['n4'].data) + + # print(g.nodes['n2'].data) + esp.mm.geometry.geometry_in_graph(g) + esp.mm.energy.energy_in_graph(g) diff --git a/espaloma/mm/torsion.py b/espaloma/mm/torsion.py index 09c7e686..0e8616da 100644 --- a/espaloma/mm/torsion.py +++ b/espaloma/mm/torsion.py @@ -37,3 +37,89 @@ def periodic_torsion( ) # assert(out.shape == (len(x), 1)) return out + + +def angle_angle( + x_angle_left, + x_angle_right, + eq_angle_left, + eq_angle_right, + k_angle_angle, + ): + return esp.mm.functional.harmonic_harmonic_coupled( + x0=x_angle_left, + eq0=eq_angle_left, + x1=x_angle_right, + eq1=eq_angle_right, + k=k_angle_angle, + ) + +def angle_torsion( + x_angle_left, + x_angle_right, + eq_angle_left, + eq_angle_right, + x, + k_angle_torsion_left, + k_angle_torsion_right, + periodicity=list(range(1,3)), + ): + return esp.mm.functional.harmonic_periodic_coupled( + x_harmonic=x_angle_left, + x_periodic=x, + k=k_angle_torsion_left, + eq=eq_angle_left, + periodicity=periodicity, + ) + esp.mm.functional.harmonic_periodic_coupled( + x_harmonic=x_angle_right, + x_periodic=x, + k=k_angle_torsion_right, + eq=eq_angle_right, + periodicity=periodicity, + ) + +def angle_angle_torsion( + x_angle_left, + x_angle_right, + eq_angle_left, + eq_angle_right, + x, + k_angle_angle_torsion, + ): + return esp.mm.functional.harmonic_harmonic_periodic_coupled( + theta0=x_angle_left, + theta1=x_angle_right, + eq0=eq_angle_left, + eq1=eq_angle_right, + phi=x, + k=k_angle_angle_torsion, + ) + +def bond_torsion( + x_bond_left, + x_bond_center, + x_bond_right, + x, + k_left_torsion, + k_center_torsion, + k_right_torsion, + eq_left_torsion, + eq_center_torsion, + eq_right_torsion, + ): + return esp.mm.functional.harmonic_periodic_coupled( + x_harmonic=x_bond_left, + x_periodic=x, + k=k_left_torsion, + eq=eq_left_torsion, + ) + esp.mm.functional.harmonic_periodic_coupled( + x_harmonic=x_bond_center, + x_periodic=x, + k=k_center_torsion, + eq=eq_center_torsion, + ) + esp.mm.functional.harmonic_periodic_coupled( + x_harmonic=x_bond_right, + x_periodic=x, + k=k_right_torsion, + eq=eq_right_torsion, + ) diff --git a/scripts/qca_qm/run.py b/scripts/qca_qm/run.py index 257f76c5..58db8ca2 100644 --- a/scripts/qca_qm/run.py +++ b/scripts/qca_qm/run.py @@ -180,16 +180,13 @@ def forward(self, g): ) - torch.nn.init.normal_( - net[1].f_out_2_to_log_coefficients.bias, - mean=-5, - ) - - torch.nn.init.normal_( - net[1].f_out_3_to_log_coefficients.bias, - mean=-5, - ) + torch.nn.init.normal_(net[1].f_out_2_to_log_coefficients.bias, mean=-5,) + torch.nn.init.normal_(net[1].f_out_3_to_log_coefficients.bias, mean=-5,) + for name, module in net[1].named_modules(): + if "k" in name: + torch.nn.init.normal(module.bias, mean=0.0, std=1e-4) + torch.nn.init.normal(module.weight, mean=0.0, std=1e-4) # net = net.cuda() metrics_tr = [ From a9c10b47e4c003177156ce1eeaddffcca76c514a Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Tue, 16 Mar 2021 14:49:40 -0400 Subject: [PATCH 10/39] sum up class-ii energy --- espaloma/mm/energy.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/espaloma/mm/energy.py b/espaloma/mm/energy.py index 74cdf5a0..8a81af3d 100644 --- a/espaloma/mm/energy.py +++ b/espaloma/mm/energy.py @@ -272,6 +272,16 @@ def energy_in_graph( lambda node: apply_angle_ii(node, suffix=suffix), ntype="n3", ) + g.apply_nodes( + lambda node: { + 'u%s' % suffix: + node.data['u_urey_bradley%s' % suffix]\ + + node.data['u_bond_bond%s' % suffix]\ + + node.data['u_bond_angle%s' % suffix] + }, + ntype='n3' + ) + if g.number_of_nodes("n4") > 0 and "n4" in terms: g.apply_nodes( lambda node: apply_torsion(node, suffix=suffix), ntype="n4", @@ -281,6 +291,16 @@ def energy_in_graph( g.apply_nodes( lambda node: apply_torsion_ii(node, suffix=suffix), ntype="n4", ) + + g.apply_nodes( + lambda node: { + 'u%s' % suffix: + node.data['u_angle_angle%s' % suffix]\ + + node.data['u_angle_angle_torsion%s' % suffix]\ + + node.data['u_bond_torsion%s' % suffix] + }, + ntype='n4' + ) if g.number_of_nodes("n4_improper") > 0 and "n4_improper" in terms: g.apply_nodes( From 2e29ac050139dd883562f91b1bae123e45a2d824 Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Wed, 17 Mar 2021 14:47:44 -0400 Subject: [PATCH 11/39] janossy class-ii scripts --- scripts/qca_qm/run.py | 57 +------------------------------------------ scripts/qca_qm/run.sh | 2 +- 2 files changed, 2 insertions(+), 57 deletions(-) diff --git a/scripts/qca_qm/run.py b/scripts/qca_qm/run.py index 58db8ca2..395d70ca 100644 --- a/scripts/qca_qm/run.py +++ b/scripts/qca_qm/run.py @@ -120,74 +120,19 @@ def forward(self, g): g.nodes['n2'].data['coefficients'] = g.nodes['n2'].data['log_coefficients'].exp() g.nodes['n3'].data['coefficients'] = g.nodes['n3'].data['log_coefficients'].exp() return g - - class CarryII(torch.nn.Module): - def forward(self, g): - _, g.nodes['n2'].data['_eq'] = esp.mm.functional.linear_mixture_to_original( - g.nodes['n2'].data['coefficients'][:, 0][:, None], - g.nodes['n2'].data['coefficients'][:, 1][:, None], - ) - - _, g.nodes['n3'].data['_eq'] = esp.mm.functional.linear_mixture_to_original( - g.nodes['n3'].data['coefficients'][:, 0][:, None], - g.nodes['n3'].data['coefficients'][:, 1][:, None], - ) - - g.multi_update_all( - { - "n2_as_0_in_n3": ( - dgl.function.copy_src("_eq", "m_eq_0"), - dgl.function.sum("m_eq_0", "eq_left"), - ), - "n2_as_1_in_n3": ( - dgl.function.copy_src("_eq", "m_eq_1"), - dgl.function.sum("m_eq_1", "eq_right"), - ), - "n2_as_0_in_n4": ( - dgl.function.copy_src("_eq", "m_eq_0"), - dgl.function.sum("m_eq_0", "eq_left_torsion"), - ), - "n2_as_1_in_n4": ( - dgl.function.copy_src("_eq", "m_eq_1"), - dgl.function.sum("m_eq_1", "eq_center_torsion"), - ), - "n2_as_2_in_n4": ( - dgl.function.copy_src("_eq", "m_eq_2"), - dgl.function.sum("m_eq_2", "eq_right_torsion"), - ), - "n3_as_0_in_n4": ( - dgl.function.copy_src("_eq", "m3_eq_0"), - dgl.function.sum("m3_eq_0", "eq_angle_left"), - ), - "n3_as_1_in_n4": ( - dgl.function.copy_src("_eq", "m3_eq_1"), - dgl.function.sum("m3_eq_1", "eq_angle_right"), - ) - }, - cross_reducer="sum" - ) - - return g - net = torch.nn.Sequential( representation, readout, readout_improper, ExpCoeff(), - CarryII(), esp.mm.geometry.GeometryInGraph(), - esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4", "n4_improper"], ii=True), + esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4", "n4_improper"], ii=False), ) torch.nn.init.normal_(net[1].f_out_2_to_log_coefficients.bias, mean=-5,) torch.nn.init.normal_(net[1].f_out_3_to_log_coefficients.bias, mean=-5,) - for name, module in net[1].named_modules(): - if "k" in name: - torch.nn.init.normal(module.bias, mean=0.0, std=1e-4) - torch.nn.init.normal(module.weight, mean=0.0, std=1e-4) - # net = net.cuda() metrics_tr = [ esp.metrics.GraphMetric( diff --git a/scripts/qca_qm/run.sh b/scripts/qca_qm/run.sh index 2c970dc5..6130194f 100644 --- a/scripts/qca_qm/run.sh +++ b/scripts/qca_qm/run.sh @@ -5,7 +5,7 @@ #BSUB -W 36:00 #BSUB -n 1 -n_epochs=3000 +n_epochs=10000 layer="SAGEConv" units=128 act="relu" From bf27e27910f68e48c351ae552b14823ff3fee622 Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Wed, 17 Mar 2021 15:54:28 -0400 Subject: [PATCH 12/39] training scripts --- espaloma/app/experiment.py | 14 ++++++++-- espaloma/data/tests/test_save_and_load.py | 12 ++++++++ .../graphs/tests/test_gaff_parametrize.py | 2 ++ espaloma/mm/tests/test_bond_energy.py | 28 +++++++++++++++++++ espaloma/mm/tests/test_openmm_consistency.py | 8 ++++-- espaloma/nn/sequential.py | 9 ++++-- 6 files changed, 65 insertions(+), 8 deletions(-) create mode 100644 espaloma/data/tests/test_save_and_load.py create mode 100644 espaloma/mm/tests/test_bond_energy.py diff --git a/espaloma/app/experiment.py b/espaloma/app/experiment.py index a62fce30..2157127a 100644 --- a/espaloma/app/experiment.py +++ b/espaloma/app/experiment.py @@ -64,6 +64,7 @@ def __init__( n_epochs=100, record_interval=1, normalize=esp.data.normalize.ESOL100LogNormalNormalize, + scheduler=None, device=torch.device("cpu"), ): super(Train, self).__init__() @@ -82,6 +83,7 @@ def __init__( self.record_interval = record_interval self.normalize = normalize() self.states = {} + self.scheduler = scheduler # make optimizer if callable(optimizer): @@ -111,6 +113,7 @@ def train_once(self): retain_graph=False g = g.to(self.device) + self.net.train() def closure(g=g): self.optimizer.zero_grad() @@ -125,7 +128,11 @@ def closure(g=g): raise RuntimeError("Loss is Nan.") return loss - self.optimizer.step(closure) + loss = closure() + self.optimizer.step() + + if self.scheduler is not None: + self.scheduler.step(loss) def train(self): """ Train the model for multiple steps and @@ -209,7 +216,7 @@ def test(self): # load the state dict self.net.load_state_dict(state) - # local scope + self.net.eval() for metric in self.metrics: assert isinstance(metric, esp.metrics.Metric) @@ -264,6 +271,7 @@ def __init__( n_epochs=100, record_interval=1, device=torch.device("cpu"), + scheduler=None, ): # bookkeeping @@ -278,6 +286,7 @@ def __init__( self.metrics_te = metrics_te self.normalize = normalize self.record_interval = record_interval + self.scheduler = scheduler def __str__(self): _str = "" @@ -313,6 +322,7 @@ def run(self): normalize=self.normalize, device=self.device, record_interval=self.record_interval, + scheduler=self.scheduler, ) train.train() diff --git a/espaloma/data/tests/test_save_and_load.py b/espaloma/data/tests/test_save_and_load.py new file mode 100644 index 00000000..5777b1a3 --- /dev/null +++ b/espaloma/data/tests/test_save_and_load.py @@ -0,0 +1,12 @@ +import pytest + + +def test_save_and_load(): + import espaloma as esp + g = esp.Graph('C') + ds = esp.data.dataset.GraphDataset([g]) + ds.save('ds') + + new_ds = esp.data.dataset.GraphDataset.load('ds') + + os.rmdir('ds') diff --git a/espaloma/graphs/tests/test_gaff_parametrize.py b/espaloma/graphs/tests/test_gaff_parametrize.py index 469afd24..e5472e40 100644 --- a/espaloma/graphs/tests/test_gaff_parametrize.py +++ b/espaloma/graphs/tests/test_gaff_parametrize.py @@ -10,3 +10,5 @@ def test_gaff_parametrize(): print(g.nodes['n2'].data) print(g.nodes['n3'].data) + print(g.nodes['n4'].data) + print(g.nodes['n4_improper'].data) diff --git a/espaloma/mm/tests/test_bond_energy.py b/espaloma/mm/tests/test_bond_energy.py new file mode 100644 index 00000000..b81d0fbb --- /dev/null +++ b/espaloma/mm/tests/test_bond_energy.py @@ -0,0 +1,28 @@ +import pytest + +def test_multiple_conformation(): + import espaloma as esp + + g = esp.Graph('c1ccccc1') + + # make simulation + from espaloma.data.md import MoleculeVacuumSimulation + simulation = MoleculeVacuumSimulation( + n_samples=10, n_steps_per_sample=10 + ) + g = simulation.run(g, in_place=True) + + param = esp.graphs.legacy_force_field.LegacyForceField( + 'smirnoff99Frosst').parametrize + + g = param(g) + + esp.mm.geometry.geometry_in_graph(g.heterograph) + + esp.mm.energy.energy_in_graph(g.heterograph, suffix='_ref') + + + + + + diff --git a/espaloma/mm/tests/test_openmm_consistency.py b/espaloma/mm/tests/test_openmm_consistency.py index ba206100..302c999f 100644 --- a/espaloma/mm/tests/test_openmm_consistency.py +++ b/espaloma/mm/tests/test_openmm_consistency.py @@ -15,7 +15,7 @@ import espaloma as esp -decimal_threshold = 4 +decimal_threshold = 2 def _create_torsion_sim( @@ -95,7 +95,7 @@ def test_energy_angle_and_bond(g): # get simulation simulation = MoleculeVacuumSimulation( - n_samples=1, n_steps_per_sample=10 + n_samples=1, n_steps_per_sample=10, forcefield="gaff-1.81" ).simulation_from_graph(g) system = simulation.system @@ -167,7 +167,7 @@ def test_energy_angle_and_bond(g): energies[name] = energy # parametrize - ff = esp.graphs.legacy_force_field.LegacyForceField("smirnoff99Frosst") + ff = esp.graphs.legacy_force_field.LegacyForceField("gaff-1.81") g = ff.parametrize(g) # n2 : bond, n3: angle, n1: nonbonded? @@ -176,10 +176,12 @@ def test_energy_angle_and_bond(g): g.nodes[term].data["k"] = g.nodes[term].data["k_ref"] g.nodes[term].data["eq"] = g.nodes[term].data["eq_ref"] + ''' for term in ["n1"]: g.nodes[term].data["sigma"] = g.nodes[term].data["sigma_ref"] g.nodes[term].data["epsilon"] = g.nodes[term].data["epsilon_ref"] # g.nodes[term].data['q'] = g.nodes[term].data['q_ref'] + ''' for term in ["n4"]: g.nodes[term].data["phases"] = g.nodes[term].data["phases_ref"] diff --git a/espaloma/nn/sequential.py b/espaloma/nn/sequential.py index ca6599a6..368a8632 100644 --- a/espaloma/nn/sequential.py +++ b/espaloma/nn/sequential.py @@ -39,9 +39,12 @@ def __init__( # str -> activation elif isinstance(exe, str): - activation = getattr(torch.nn.functional, exe) - - setattr(self, "a" + str(idx), activation) + if exe == "bn": + setattr(self, "a" + str(idx), torch.nn.BatchNorm1d(dim)) + + else: + activation = getattr(torch.nn.functional, exe) + setattr(self, "a" + str(idx), activation) self.exes.append("a" + str(idx)) From 1faa9d3fd09da2ce102dde09565482ba865a3a7e Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Wed, 17 Mar 2021 16:09:41 -0400 Subject: [PATCH 13/39] typo correction --- espaloma/data/md.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/espaloma/data/md.py b/espaloma/data/md.py index 94b79a8e..6ee51d4c 100644 --- a/espaloma/data/md.py +++ b/espaloma/data/md.py @@ -201,7 +201,7 @@ def simulation_from_graph(self, g): # parameterize topology topology = g.mol.to_topology().to_openmm() - generator = systemgenerator( + generator = SystemGenerator( small_molecule_forcefield=self.forcefield, molecules=[g.mol], ) From 48677755aae3b52bf382a0dbac4fc17e25450e68 Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Wed, 17 Mar 2021 16:31:18 -0400 Subject: [PATCH 14/39] Update legacy_force_field.py --- espaloma/graphs/legacy_force_field.py | 86 ++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 7 deletions(-) diff --git a/espaloma/graphs/legacy_force_field.py b/espaloma/graphs/legacy_force_field.py index 64592ed6..f9e62cfd 100644 --- a/espaloma/graphs/legacy_force_field.py +++ b/espaloma/graphs/legacy_force_field.py @@ -168,7 +168,7 @@ def _type_gaff(self, g): return g - def _parametrize_gaff(self, g): + def _parametrize_gaff(self, g, n_max_phases=6): from openmmforcefields.generators import SystemGenerator # define a system generator @@ -177,9 +177,8 @@ def _parametrize_gaff(self, g): ) mol = g.mol - print(mol, flush=True) mol.assign_partial_charges("formal_charge") - print(mol, flush=True) + # create system sys = system_generator.create_system( topology=mol.to_topology().to_openmm(), @@ -196,6 +195,37 @@ def _parametrize_gaff(self, g): for position, idxs in enumerate(g.nodes["n3"].data["idxs"]) } + torsion_lookup = { + tuple(idxs.detach().numpy()): position + for position, idxs in enumerate(g.nodes["n4"].data["idxs"]) + } + + improper_lookup = { + tuple(idxs.detach().numpy()): position + for position, idxs in enumerate(g.nodes["n4_improper"].data["idxs"]) + } + + torsion_phases = torch.zeros( + g.heterograph.number_of_nodes("n4"), n_max_phases, + ) + + torsion_periodicities = torch.zeros( + g.heterograph.number_of_nodes("n4"), n_max_phases, + ) + + torsion_ks = torch.zeros(g.heterograph.number_of_nodes("n4"), n_max_phases,) + + + improper_phases = torch.zeros( + g.heterograph.number_of_nodes("n4"), n_max_phases, + ) + + improper_periodicities = torch.zeros( + g.heterograph.number_of_nodes("n4"), n_max_phases, + ) + + improper_ks = torch.zeros(g.heterograph.number_of_nodes("n4"), n_max_phases,) + for force in sys.getForces(): name = force.__class__.__name__ if "HarmonicBondForce" in name: @@ -218,7 +248,7 @@ def _parametrize_gaff(self, g): g.nodes["n2"].data["eq_ref"][position] = eq.value_in_unit( esp.units.DISTANCE_UNIT, ) - g.nodes["n2"].data["k_ref"][position] = (0.5 * k).value_in_unit( + g.nodes["n2"].data["k_ref"][position] = k.value_in_unit( esp.units.FORCE_CONSTANT_UNIT, ) @@ -226,7 +256,7 @@ def _parametrize_gaff(self, g): g.nodes["n2"].data["eq_ref"][position] = eq.value_in_unit( esp.units.DISTANCE_UNIT, ) - g.nodes["n2"].data["k_ref"][position] = (0.5 * k).value_in_unit( + g.nodes["n2"].data["k_ref"][position] = k.value_in_unit( esp.units.FORCE_CONSTANT_UNIT, ) @@ -251,7 +281,7 @@ def _parametrize_gaff(self, g): g.nodes["n3"].data["eq_ref"][position] = eq.value_in_unit( esp.units.ANGLE_UNIT, ) - g.nodes["n3"].data["k_ref"][position] = (0.5 * k).value_in_unit( + g.nodes["n3"].data["k_ref"][position] = k.value_in_unit( esp.units.ANGLE_FORCE_CONSTANT_UNIT, ) @@ -259,9 +289,51 @@ def _parametrize_gaff(self, g): g.nodes["n3"].data["eq_ref"][position] = eq.value_in_unit( esp.units.ANGLE_UNIT, ) - g.nodes["n3"].data["k_ref"][position] = (0.5 * k).value_in_unit( + g.nodes["n3"].data["k_ref"][position] = k.value_in_unit( esp.units.ANGLE_FORCE_CONSTANT_UNIT, ) + + if "PeriodicTorsionForce" in name: + for idx in range(force.getNumTorsions()): + idx0, idx1, idx2, idx3, periodicity, phase, k = force.getTorsionParameters( + idx + ) + + if (idx0, idx1, idx2, idx3) in torsion_lookup: + position = torsion_lookup[(idx0, idx1, idx2, idx3)] + for sub_idx in range(n_max_phases): + if torsion_ks[position, sub_idx] == 0: + torsion_ks[position, sub_idx] = 0.5 * k.value_in_unit(esp.units.ENERGY_UNIT) + torsion_phases[position, sub_idx] = phase.value_in_unit(esp.units.ANGLE_UNIT) + torsion_periodicities[position, sub_idx] = periodicity + + position = torsion_lookup[(idx3, idx2, idx1, idx0)] + torsion_ks[position, sub_idx] = 0.5 * k.value_in_unit(esp.units.ENERGY_UNIT) + torsion_phases[position, sub_idx] = phase.value_in_unit(esp.units.ANGLE_UNIT) + torsion_periodicities[position, sub_idx] = periodicity + break + + g.heterograph.apply_nodes( + lambda nodes: { + "k_ref": torsion_ks, + "periodicity_ref": torsion_periodicities, + "phases_ref": torsion_phases, + }, + ntype="n4" + ) + + ''' + g.heterograph.apply_nodes( + lambda nodes: { + "k_ref": improper_ks, + "periodicity_ref": improper_periodicities, + "phases_ref": improper_phases, + }, + ntype="n4_improper" + ) + + ''' + ''' def apply_torsion(node, n_max_phases=6): phases = torch.zeros( From bec8e14ed23ccbce81bacd719bd882f2e98612ed Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Fri, 19 Mar 2021 23:19:23 -0400 Subject: [PATCH 15/39] varying snapshot scripts --- scripts/mm_sampling_zinc_tune/run.py | 238 +++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 scripts/mm_sampling_zinc_tune/run.py diff --git a/scripts/mm_sampling_zinc_tune/run.py b/scripts/mm_sampling_zinc_tune/run.py new file mode 100644 index 00000000..313abca6 --- /dev/null +++ b/scripts/mm_sampling_zinc_tune/run.py @@ -0,0 +1,238 @@ +# ============================================================================= +# IMPORTS +# ============================================================================= +import argparse +import os + +import numpy as np +import torch + +import espaloma as esp + +def run(args): + ''' + # define data + ds_tr = esp.data.qca.pfizer() + ds_vl = esp.data.qca.coverage() + print(len(ds_tr)) + print(len(ds_vl)) + print("loaded", flush=True) + + # get force field + forcefield = esp.graphs.legacy_force_field.LegacyForceField( + args.forcefield + ) + + # param / typing + operation = forcefield.parametrize + + # apply to dataset + ds_tr = ds_tr.apply(operation, in_place=True) + ds_vl = ds_vl.apply(operation, in_place=True) + print("parametrized", flush=True) + + # apply simulation + # make simulation + from espaloma.data.md import MoleculeVacuumSimulation + simulation = MoleculeVacuumSimulation( + n_samples=500, n_steps_per_sample=100, + ) + print("simulated", flush=True) + + ds_tr = ds_tr.apply(simulation.run, in_place=True) + ds_vl = ds_vl.apply(simulation.run, in_place=True) + + ds_tr.save('pfizer') + ds_vl.save('coverage') + ''' + + ds = esp.data.dataset.GraphDataset.load("zinc") + ds.shuffle(seed=2666) + + def subsample(g, n_samples): + idxs = torch.randperm(g.nodes['n1'].data['xyz'].shape[1])[:n_samples] + g.nodes['n1'].data['xyz'] = g.nodes['n1'].data['xyz'][:, idxs, :] + return g + + import functools + ds.apply(functools.partial(subsample, n_samples=args.n_samples), in_place=True) + + _ds_vl = ds[5000:] + _ds_tr = ds[:args.first] + + # batch + ds_tr = _ds_tr.view("graph", batch_size=100, shuffle=True) + ds_vl = _ds_vl.view("graph", batch_size=100) + + g = next(iter(ds_tr)) + esp.mm.geometry.geometry_in_graph(g) + + # layer + layer = esp.nn.layers.dgl_legacy.gn(args.layer) + + # representation + representation = esp.nn.Sequential(layer, config=args.config) + + # get the last bit of units + units = [int(x) for x in args.config if isinstance(x, int) or (isinstance(x, str) and x.isdigit())][-1] + + janossy_config = [] + for x in args.janossy_config: + if isinstance(x, int): + janossy_config.append(int(x)) + + elif x.isdigit(): + janossy_config.append(int(x)) + + else: + janossy_config.append(x) + + readout = esp.nn.readout.janossy.JanossyPooling( + in_features=units, config=janossy_config, + out_features={ + 2: {'log_coefficients': 2}, + 3: {'log_coefficients': 2}, + }, + ) + + class ExpCoeff(torch.nn.Module): + def forward(self, g): + g.nodes['n2'].data['coefficients'] = g.nodes['n2'].data['log_coefficients'].exp() + g.nodes['n3'].data['coefficients'] = g.nodes['n3'].data['log_coefficients'].exp() + return g + + + net = torch.nn.Sequential( + representation, + readout, + ExpCoeff(), + esp.mm.geometry.GeometryInGraph(), + esp.mm.energy.EnergyInGraph(terms=["n2", "n3"]), + esp.mm.energy.EnergyInGraph(suffix='_ref', terms=["n2", "n3"]), + ) + + + torch.nn.init.normal_( + net[1].f_out_2_to_log_coefficients.bias, + mean=-5, + ) + + torch.nn.init.normal_( + net[1].f_out_3_to_log_coefficients.bias, + mean=-5, + ) + + # net = net.cuda() + metrics_tr = [ + esp.metrics.GraphMetric( + base_metric=esp.metrics.center(torch.nn.MSELoss(reduction='none'), reduction="mean"), + between=['u', "u_ref"], + level="g", + ), + ] + + metrics_te = [ + esp.metrics.GraphMetric( + base_metric=esp.metrics.center(esp.metrics.rmse), + between=['u', "u_ref"], + level="g", + ), + ] + + + optimizer = torch.optim.Adam(net.parameters(), args.lr) + + exp = esp.TrainAndTest( + ds_tr=ds_tr, + ds_te=ds_vl, + net=net, + metrics_tr=metrics_tr, + metrics_te=metrics_te, + n_epochs=args.n_epochs, + normalize=esp.data.normalize.NotNormalize, + record_interval=100, + optimizer=optimizer, + device=torch.device('cuda'), + ) + + results = exp.run() + + print(esp.app.report.markdown(results), flush=True) + + curves = esp.app.report.curve(results) + + import os + os.mkdir(args.out) + for spec, curve in curves.items(): + np.save(args.out + "/" + "_".join(spec) + ".npy", curve) + + net = net.cpu() + + import pandas as pd + df = pd.DataFrame(columns=["SMILES", "RMSE", "n_snapshots"]) + import os + torch.save(net.state_dict(), args.out + "/net.th") + + ''' + for g in _ds_tr: + net(g.heterograph) + + _df = { + 'SMILES': g.mol.to_smiles(), + 'RMSE': 625 * esp.metrics.center(esp.metrics.rmse)(g.nodes['g'].data['u_ref'], g.nodes['g'].data['u']).cpu().detach().numpy().item(), + 'n_snapshots': g.nodes['n1'].data['xyz'].shape[1] + } + + df = df.append(_df, ignore_index=True) + + df.to_csv(args.out + "/inspect_tr_%s.csv" % args.first) + + for g in _ds_vl: + net(g.heterograph) + + _df = { + 'SMILES': g.mol.to_smiles(), + 'RMSE': 625 * esp.metrics.center(esp.metrics.rmse)(g.nodes['g'].data['u_ref'], g.nodes['g'].data['u']).cpu().detach().numpy().item(), + 'n_snapshots': g.nodes['n1'].data['xyz'].shape[1] + } + + df = df.append(_df, ignore_index=True) + + df.to_csv(args.out + "/inspect_vl_%s.csv" % args.first) + ''' +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--forcefield", default="gaff-1.81", type=str) + parser.add_argument("--layer", default="GraphConv", type=str) + parser.add_argument("--n_classes", default=100, type=int) + parser.add_argument( + "--config", nargs="*", default=[128, "leaky_relu", 128, "leaky_relu", 128, "leaky_relu"], + ) + + parser.add_argument( + "--training_metrics", nargs="*", default=["TypingCrossEntropy"] + ) + parser.add_argument( + "--test_metrics", nargs="*", default=["TypingAccuracy"] + ) + parser.add_argument( + "--out", default="results", type=str + ) + parser.add_argument("--janossy_config", nargs="*", default=[32, "leaky_relu"]) + + parser.add_argument("--graph_act", type=str, default="leaky_relu") + + parser.add_argument("--n_epochs", default=10, type=int) + + parser.add_argument("--weight", default=1.0, type=float) + + parser.add_argument("--lr", default=1e-5, type=float) + parser.add_argument("--batch_size", default=32, type=float) + parser.add_argument("--first", default=32, type=int) + parser.add_argument("--n_samples", default=10, type=int) + args = parser.parse_args() + + run(args) + From 5862a2894f8d40f15999ea3de7dfc48432203d94 Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Mon, 29 Mar 2021 10:57:52 -0400 Subject: [PATCH 16/39] coefficient --- espaloma/mm/angle.py | 2 +- espaloma/mm/bond.py | 2 +- espaloma/mm/functional.py | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/espaloma/mm/angle.py b/espaloma/mm/angle.py index a86c7e92..e73979bb 100644 --- a/espaloma/mm/angle.py +++ b/espaloma/mm/angle.py @@ -30,7 +30,7 @@ def harmonic_angle(x, k, eq): # NOTE: # 0.25 because all angles are calculated twice - return 0.25 * esp.mm.functional.harmonic(x=x, k=k, eq=eq) + return 0.5 * esp.mm.functional.harmonic(x=x, k=k, eq=eq) def linear_mixture_angle(x, coefficients, phases): diff --git a/espaloma/mm/bond.py b/espaloma/mm/bond.py index 8ecc3000..5530c0f0 100644 --- a/espaloma/mm/bond.py +++ b/espaloma/mm/bond.py @@ -30,7 +30,7 @@ def harmonic_bond(x, k, eq): # NOTE: # 0.25 because all bonds are calculated twice - return 0.25 * esp.mm.functional.harmonic(x=x, k=k, eq=eq) + return 0.5 * esp.mm.functional.harmonic(x=x, k=k, eq=eq) def gaussian_bond(x, coefficients): diff --git a/espaloma/mm/functional.py b/espaloma/mm/functional.py index 3874ff40..b58fd8bd 100644 --- a/espaloma/mm/functional.py +++ b/espaloma/mm/functional.py @@ -28,7 +28,6 @@ def linear_mixture_to_original(k1, k2, b1, b2): return k, b - # ============================================================================= # MODULE FUNCTIONS # ============================================================================= @@ -50,7 +49,7 @@ def harmonic(x, k, eq, order=[2]): if isinstance(order, list): order = torch.tensor(order, device=x.device) - return k * ((x - eq)).pow(order[:, None, None]).permute(1, 2, 0).sum( + return 0.5 * k * ((x - eq)).pow(order[:, None, None]).permute(1, 2, 0).sum( dim=-1 ) From 4f95828846e4052580317b66435da850af22eb08 Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Mon, 29 Mar 2021 11:12:03 -0400 Subject: [PATCH 17/39] alkethoh --- espaloma/data/alkethoh.smi | 1156 ----------------------------------- espaloma/data/collection.py | 11 +- 2 files changed, 9 insertions(+), 1158 deletions(-) delete mode 100644 espaloma/data/alkethoh.smi diff --git a/espaloma/data/alkethoh.smi b/espaloma/data/alkethoh.smi deleted file mode 100644 index d6d1ef05..00000000 --- a/espaloma/data/alkethoh.smi +++ /dev/null @@ -1,1156 +0,0 @@ -C1C(C(O1)O)O AlkEthOH_r0 -C1C(C1O)O AlkEthOH_r1 -C1C(CC1O)O AlkEthOH_r2 -C1C(CO1)O AlkEthOH_r3 -C1C(COC1O)O AlkEthOH_r4 -C1C(O1)O AlkEthOH_r5 -C1C(OC(O1)O)O AlkEthOH_r6 -C1C(OC1O)O AlkEthOH_r7 -C1C(OCO1)O AlkEthOH_r8 -C1C(OO1)O AlkEthOH_r9 -C1CC(C(C1)O)O AlkEthOH_r10 -C1CC(C(COC1)O)O AlkEthOH_r11 -C1CC(C(OC1)O)O AlkEthOH_r12 -C1CC(C1)O AlkEthOH_r13 -C1CC(C1O)O AlkEthOH_r14 -C1CC(CC(C1)O)O AlkEthOH_r15 -C1CC(CC(COC1)O)O AlkEthOH_r16 -C1CC(CC(OC1)O)O AlkEthOH_r17 -C1CC(CC1O)O AlkEthOH_r18 -C1CC(CCCOC1)O AlkEthOH_r19 -C1CC(CCOC1)O AlkEthOH_r20 -C1CC(COC1)O AlkEthOH_r21 -C1CC(COCOC1)O AlkEthOH_r22 -C1CC(COOC1)O AlkEthOH_r23 -C1CC(OC(C1)O)O AlkEthOH_r24 -C1CC(OC(COC1)O)O AlkEthOH_r25 -C1CC(OC(OC1)O)O AlkEthOH_r26 -C1CC(OC1)O AlkEthOH_r27 -C1CC(OC1O)O AlkEthOH_r28 -C1CC(OCCCOC1)O AlkEthOH_r29 -C1CC(OCCOC1)O AlkEthOH_r30 -C1CC(OCOC1)O AlkEthOH_r31 -C1CC(OOC1)O AlkEthOH_r32 -C1CC1 AlkEthOH_r33 -C1CC1O AlkEthOH_r34 -C1CCC(C(C1)O)O AlkEthOH_r35 -C1CCC(C(CC1)O)O AlkEthOH_r36 -C1CCC(C(OCC1)O)O AlkEthOH_r37 -C1CCC(C1)O AlkEthOH_r38 -C1CCC(CC(C1)O)O AlkEthOH_r39 -C1CCC(CC(CC1)O)O AlkEthOH_r40 -C1CCC(CC(OCC1)O)O AlkEthOH_r41 -C1CCC(CC1)O AlkEthOH_r42 -C1CCC(CCOCC1)O AlkEthOH_r43 -C1CCC(COCC1)O AlkEthOH_r44 -C1CCC(COOCC1)O AlkEthOH_r45 -C1CCC(OC(C1)O)O AlkEthOH_r46 -C1CCC(OC(CC1)O)O AlkEthOH_r47 -C1CCC(OC(OCC1)O)O AlkEthOH_r48 -C1CCC(OCC1)O AlkEthOH_r49 -C1CCC(OOCC1)O AlkEthOH_r50 -C1CCC1 AlkEthOH_r51 -C1CCCC(C(CC1)O)O AlkEthOH_r52 -C1CCCC(CC(CC1)O)O AlkEthOH_r53 -C1CCCC(CC1)O AlkEthOH_r54 -C1CCCC(CCC1)O AlkEthOH_r55 -C1CCCC(OC(CC1)O)O AlkEthOH_r56 -C1CCCC(OCCC1)O AlkEthOH_r57 -C1CCCC1 AlkEthOH_r58 -C1CCCCC(CCC1)O AlkEthOH_r59 -C1CCCCC1 AlkEthOH_r60 -C1CCCCCC1 AlkEthOH_r61 -C1CCCCCCC1 AlkEthOH_r62 -C1CCCCCCCC1 AlkEthOH_r63 -C1CCCCOCCC1 AlkEthOH_r64 -C1CCCOC(CC1)O AlkEthOH_r65 -C1CCCOC(OCC1)O AlkEthOH_r66 -C1CCCOCC(CC1)O AlkEthOH_r67 -C1CCCOCC1 AlkEthOH_r68 -C1CCCOCCC1 AlkEthOH_r69 -C1CCCOCOCC1 AlkEthOH_r70 -C1CCCOOC(CC1)O AlkEthOH_r71 -C1CCCOOCC1 AlkEthOH_r72 -C1CCCOOCCC1 AlkEthOH_r73 -C1CCOC(C(C1)O)O AlkEthOH_r74 -C1CCOC(C(OC1)O)O AlkEthOH_r75 -C1CCOC(C1)O AlkEthOH_r76 -C1CCOC(CC(C1)O)O AlkEthOH_r77 -C1CCOC(CC(OC1)O)O AlkEthOH_r78 -C1CCOC(CCOC1)O AlkEthOH_r79 -C1CCOC(COC1)O AlkEthOH_r80 -C1CCOC(OC(C1)O)O AlkEthOH_r81 -C1CCOC(OC(OC1)O)O AlkEthOH_r82 -C1CCOC(OC1)O AlkEthOH_r83 -C1CCOC(OCC1)O AlkEthOH_r84 -C1CCOC(OOCC1)O AlkEthOH_r85 -C1CCOC1 AlkEthOH_r86 -C1CCOCC(C(C1)O)O AlkEthOH_r87 -C1CCOCC(C1)O AlkEthOH_r88 -C1CCOCC(CC(C1)O)O AlkEthOH_r89 -C1CCOCC(COC1)O AlkEthOH_r90 -C1CCOCC(OC(C1)O)O AlkEthOH_r91 -C1CCOCC(OCC1)O AlkEthOH_r92 -C1CCOCC1 AlkEthOH_r93 -C1CCOCCC(C1)O AlkEthOH_r94 -C1CCOCCCC(C1)O AlkEthOH_r95 -C1CCOCCCOC1 AlkEthOH_r96 -C1CCOCCOC1 AlkEthOH_r97 -C1CCOCCOCC1 AlkEthOH_r98 -C1CCOCOC(C1)O AlkEthOH_r99 -C1CCOCOC1 AlkEthOH_r100 -C1CCOCOCC(C1)O AlkEthOH_r101 -C1CCOCOCC1 AlkEthOH_r102 -C1CCOCOCOC1 AlkEthOH_r103 -C1CCOCOOCC1 AlkEthOH_r104 -C1CCOOC(C1)O AlkEthOH_r105 -C1CCOOC(COC1)O AlkEthOH_r106 -C1CCOOC(OC1)O AlkEthOH_r107 -C1CCOOC1 AlkEthOH_r108 -C1CCOOCC(C1)O AlkEthOH_r109 -C1CCOOCC(OC1)O AlkEthOH_r110 -C1CCOOCC1 AlkEthOH_r111 -C1CCOOCOC1 AlkEthOH_r112 -C1CCOOOC1 AlkEthOH_r113 -C1CCOOOCC1 AlkEthOH_r114 -C1CO1 AlkEthOH_r115 -C1COC(C(CO1)O)O AlkEthOH_r116 -C1COC(C(O1)O)O AlkEthOH_r117 -C1COC(C1O)O AlkEthOH_r118 -C1COC(CC(CO1)O)O AlkEthOH_r119 -C1COC(CC(O1)O)O AlkEthOH_r120 -C1COC(CC1O)O AlkEthOH_r121 -C1COC(CO1)O AlkEthOH_r122 -C1COC(COCO1)O AlkEthOH_r123 -C1COC(O1)O AlkEthOH_r124 -C1COC(OC(CO1)O)O AlkEthOH_r125 -C1COC(OC(O1)O)O AlkEthOH_r126 -C1COC(OC1)O AlkEthOH_r127 -C1COC(OC1O)O AlkEthOH_r128 -C1COC(OCO1)O AlkEthOH_r129 -C1COC(OOC1)O AlkEthOH_r130 -C1COC1 AlkEthOH_r131 -C1COC1O AlkEthOH_r132 -C1COCC(C(OC1)O)O AlkEthOH_r133 -C1COCC(C1O)O AlkEthOH_r134 -C1COCC(CC(OC1)O)O AlkEthOH_r135 -C1COCC(CC1O)O AlkEthOH_r136 -C1COCC(CO1)O AlkEthOH_r137 -C1COCC(COC1)O AlkEthOH_r138 -C1COCC(OC(OC1)O)O AlkEthOH_r139 -C1COCC(OC1)O AlkEthOH_r140 -C1COCC(OC1O)O AlkEthOH_r141 -C1COCC(OOC1)O AlkEthOH_r142 -C1COCC1O AlkEthOH_r143 -C1COCCC(C1O)O AlkEthOH_r144 -C1COCCC(CC1O)O AlkEthOH_r145 -C1COCCC(OC1)O AlkEthOH_r146 -C1COCCC(OC1O)O AlkEthOH_r147 -C1COCCC1O AlkEthOH_r148 -C1COCCCOC1 AlkEthOH_r149 -C1COCCO1 AlkEthOH_r150 -C1COCCOC1 AlkEthOH_r151 -C1COCCOC1O AlkEthOH_r152 -C1COCCOCC1O AlkEthOH_r153 -C1COCCOOC1O AlkEthOH_r154 -C1COCO1 AlkEthOH_r155 -C1COCOC(C1O)O AlkEthOH_r156 -C1COCOC(CC1O)O AlkEthOH_r157 -C1COCOC(CO1)O AlkEthOH_r158 -C1COCOC(OC1)O AlkEthOH_r159 -C1COCOC(OC1O)O AlkEthOH_r160 -C1COCOC1 AlkEthOH_r161 -C1COCOC1O AlkEthOH_r162 -C1COCOCC(OC1)O AlkEthOH_r163 -C1COCOCC1O AlkEthOH_r164 -C1COCOCCO1 AlkEthOH_r165 -C1COCOCO1 AlkEthOH_r166 -C1COCOCOC1 AlkEthOH_r167 -C1COCOOC1 AlkEthOH_r168 -C1COCOOC1O AlkEthOH_r169 -C1COO1 AlkEthOH_r170 -C1COOC(CO1)O AlkEthOH_r171 -C1COOC(O1)O AlkEthOH_r172 -C1COOC1 AlkEthOH_r173 -C1COOC1O AlkEthOH_r174 -C1COOCC(O1)O AlkEthOH_r175 -C1COOCC1O AlkEthOH_r176 -C1COOCCO1 AlkEthOH_r177 -C1COOCO1 AlkEthOH_r178 -C1COOO1 AlkEthOH_r179 -C1COOOC1 AlkEthOH_r180 -C1OC(O1)O AlkEthOH_r181 -C1OCO1 AlkEthOH_r182 -C1OO1 AlkEthOH_r183 -CC1(C(CCCCCO1)(C)O)C AlkEthOH_r184 -CC1(C(CCCCCO1)O)C AlkEthOH_r185 -CC1(C(CCCCCO1)O)O AlkEthOH_r186 -CC1(C(CCCCO1)(C)O)C AlkEthOH_r187 -CC1(C(CCCCO1)O)C AlkEthOH_r188 -CC1(C(CCCCO1)O)O AlkEthOH_r189 -CC1(C(CCCO1)(C)O)C AlkEthOH_r190 -CC1(C(CCCO1)O)C AlkEthOH_r191 -CC1(C(CCCO1)O)O AlkEthOH_r192 -CC1(C(CCO1)(C)O)C AlkEthOH_r193 -CC1(C(CCO1)O)C AlkEthOH_r194 -CC1(C(CCO1)O)O AlkEthOH_r195 -CC1(C(CCOCO1)(C)O)C AlkEthOH_r196 -CC1(C(CCOCO1)O)C AlkEthOH_r197 -CC1(C(CCOCO1)O)O AlkEthOH_r198 -CC1(C(CO1)(C)O)C AlkEthOH_r199 -CC1(C(CO1)O)C AlkEthOH_r200 -CC1(C(CO1)O)O AlkEthOH_r201 -CC1(C(COCCCO1)(C)O)C AlkEthOH_r202 -CC1(C(COCCCO1)O)C AlkEthOH_r203 -CC1(C(COCCCO1)O)O AlkEthOH_r204 -CC1(C(COCCO1)(C)O)C AlkEthOH_r205 -CC1(C(COCCO1)O)C AlkEthOH_r206 -CC1(C(COCCO1)O)O AlkEthOH_r207 -CC1(C(OCCCCO1)(C)C)C AlkEthOH_r208 -CC1(C(OCCCCO1)(C)O)C AlkEthOH_r209 -CC1(C(OCCCCO1)(C)O)O AlkEthOH_r210 -CC1(C(OCCCCO1)O)C AlkEthOH_r211 -CC1(C(OCCCCO1)O)O AlkEthOH_r212 -CC1(C(OCCO1)(C)C)C AlkEthOH_r213 -CC1(C(OCCO1)(C)O)C AlkEthOH_r214 -CC1(C(OCCO1)(C)O)O AlkEthOH_r215 -CC1(C(OCCO1)O)C AlkEthOH_r216 -CC1(C(OCCO1)O)O AlkEthOH_r217 -CC1(CC(C1)(C)C)C AlkEthOH_r218 -CC1(CC(C1)(C)O)C AlkEthOH_r219 -CC1(CC(C1)(C)O)O AlkEthOH_r220 -CC1(CC(C1)O)C AlkEthOH_r221 -CC1(CC(C1)O)O AlkEthOH_r222 -CC1(CC(CCCCCO1)(C)O)C AlkEthOH_r223 -CC1(CC(CCCCCO1)O)C AlkEthOH_r224 -CC1(CC(CCCCCO1)O)O AlkEthOH_r225 -CC1(CC(CCCCO1)(C)O)C AlkEthOH_r226 -CC1(CC(CCCCO1)O)C AlkEthOH_r227 -CC1(CC(CCCCO1)O)O AlkEthOH_r228 -CC1(CC(CCCCOC1)(C)O)C AlkEthOH_r229 -CC1(CC(CCCCOC1)O)C AlkEthOH_r230 -CC1(CC(CCCCOC1)O)O AlkEthOH_r231 -CC1(CC(CCCO1)(C)O)C AlkEthOH_r232 -CC1(CC(CCCO1)O)C AlkEthOH_r233 -CC1(CC(CCCO1)O)O AlkEthOH_r234 -CC1(CC(CCCOC1)(C)O)C AlkEthOH_r235 -CC1(CC(CCCOC1)O)C AlkEthOH_r236 -CC1(CC(CCCOC1)O)O AlkEthOH_r237 -CC1(CC(CCO1)(C)O)C AlkEthOH_r238 -CC1(CC(CCO1)O)C AlkEthOH_r239 -CC1(CC(CCO1)O)O AlkEthOH_r240 -CC1(CC(CCOC1)(C)O)C AlkEthOH_r241 -CC1(CC(CCOC1)O)C AlkEthOH_r242 -CC1(CC(CCOC1)O)O AlkEthOH_r243 -CC1(CC(CCOCO1)(C)O)C AlkEthOH_r244 -CC1(CC(CCOCO1)O)C AlkEthOH_r245 -CC1(CC(CCOCO1)O)O AlkEthOH_r246 -CC1(CC(CO1)(C)O)C AlkEthOH_r247 -CC1(CC(CO1)O)C AlkEthOH_r248 -CC1(CC(CO1)O)O AlkEthOH_r249 -CC1(CC(COCCCO1)(C)O)C AlkEthOH_r250 -CC1(CC(COCCCO1)O)C AlkEthOH_r251 -CC1(CC(COCCCO1)O)O AlkEthOH_r252 -CC1(CC(COCCO1)(C)O)C AlkEthOH_r253 -CC1(CC(COCCO1)O)C AlkEthOH_r254 -CC1(CC(COCCO1)O)O AlkEthOH_r255 -CC1(CC(O1)(C)C)C AlkEthOH_r256 -CC1(CC(O1)(C)O)C AlkEthOH_r257 -CC1(CC(O1)(C)O)O AlkEthOH_r258 -CC1(CC(O1)O)C AlkEthOH_r259 -CC1(CC(O1)O)O AlkEthOH_r260 -CC1(CC(OC1)(C)C)C AlkEthOH_r261 -CC1(CC(OC1)(C)O)C AlkEthOH_r262 -CC1(CC(OC1)(C)O)O AlkEthOH_r263 -CC1(CC(OC1)O)C AlkEthOH_r264 -CC1(CC(OC1)O)O AlkEthOH_r265 -CC1(CC(OCCCCO1)(C)C)C AlkEthOH_r266 -CC1(CC(OCCCCO1)(C)O)C AlkEthOH_r267 -CC1(CC(OCCCCO1)(C)O)O AlkEthOH_r268 -CC1(CC(OCCCCO1)O)C AlkEthOH_r269 -CC1(CC(OCCCCO1)O)O AlkEthOH_r270 -CC1(CC(OCCCOC1)(C)C)C AlkEthOH_r271 -CC1(CC(OCCCOC1)(C)O)C AlkEthOH_r272 -CC1(CC(OCCCOC1)(C)O)O AlkEthOH_r273 -CC1(CC(OCCCOC1)O)C AlkEthOH_r274 -CC1(CC(OCCCOC1)O)O AlkEthOH_r275 -CC1(CC(OCCO1)(C)C)C AlkEthOH_r276 -CC1(CC(OCCO1)(C)O)C AlkEthOH_r277 -CC1(CC(OCCO1)(C)O)O AlkEthOH_r278 -CC1(CC(OCCO1)O)C AlkEthOH_r279 -CC1(CC(OCCO1)O)O AlkEthOH_r280 -CC1(CC(OCCOC1)(C)C)C AlkEthOH_r281 -CC1(CC(OCCOC1)(C)O)C AlkEthOH_r282 -CC1(CC(OCCOC1)(C)O)O AlkEthOH_r283 -CC1(CC(OCCOC1)O)C AlkEthOH_r284 -CC1(CC(OCCOC1)O)O AlkEthOH_r285 -CC1(CC1(C)C)C AlkEthOH_r286 -CC1(CC1(C)O)C AlkEthOH_r287 -CC1(CC1(C)O)O AlkEthOH_r288 -CC1(CC1)C AlkEthOH_r289 -CC1(CC1)O AlkEthOH_r290 -CC1(CC1O)C AlkEthOH_r291 -CC1(CC1O)O AlkEthOH_r292 -CC1(CCC(C1)(C)C)C AlkEthOH_r293 -CC1(CCC(C1)(C)O)C AlkEthOH_r294 -CC1(CCC(C1)(C)O)O AlkEthOH_r295 -CC1(CCC(C1)O)C AlkEthOH_r296 -CC1(CCC(C1)O)O AlkEthOH_r297 -CC1(CCC(O1)(C)C)C AlkEthOH_r298 -CC1(CCC(O1)(C)O)C AlkEthOH_r299 -CC1(CCC(O1)(C)O)O AlkEthOH_r300 -CC1(CCC(O1)O)C AlkEthOH_r301 -CC1(CCC(O1)O)O AlkEthOH_r302 -CC1(CCC1(C)C)C AlkEthOH_r303 -CC1(CCC1(C)O)C AlkEthOH_r304 -CC1(CCC1(C)O)O AlkEthOH_r305 -CC1(CCC1)C AlkEthOH_r306 -CC1(CCC1)O AlkEthOH_r307 -CC1(CCC1O)C AlkEthOH_r308 -CC1(CCC1O)O AlkEthOH_r309 -CC1(CCCC(C1)(C)C)C AlkEthOH_r310 -CC1(CCCC(C1)(C)O)C AlkEthOH_r311 -CC1(CCCC(C1)(C)O)O AlkEthOH_r312 -CC1(CCCC(C1)O)C AlkEthOH_r313 -CC1(CCCC(C1)O)O AlkEthOH_r314 -CC1(CCCC(O1)(C)C)C AlkEthOH_r315 -CC1(CCCC(O1)(C)O)C AlkEthOH_r316 -CC1(CCCC(O1)(C)O)O AlkEthOH_r317 -CC1(CCCC(O1)O)C AlkEthOH_r318 -CC1(CCCC(O1)O)O AlkEthOH_r319 -CC1(CCCC1(C)C)C AlkEthOH_r320 -CC1(CCCC1(C)O)C AlkEthOH_r321 -CC1(CCCC1(C)O)O AlkEthOH_r322 -CC1(CCCC1)C AlkEthOH_r323 -CC1(CCCC1)O AlkEthOH_r324 -CC1(CCCC1O)C AlkEthOH_r325 -CC1(CCCC1O)O AlkEthOH_r326 -CC1(CCCCC(C1)(C)C)C AlkEthOH_r327 -CC1(CCCCC(C1)(C)O)C AlkEthOH_r328 -CC1(CCCCC(C1)(C)O)O AlkEthOH_r329 -CC1(CCCCC(C1)O)C AlkEthOH_r330 -CC1(CCCCC(C1)O)O AlkEthOH_r331 -CC1(CCCCC(O1)(C)C)C AlkEthOH_r332 -CC1(CCCCC(O1)(C)O)C AlkEthOH_r333 -CC1(CCCCC(O1)(C)O)O AlkEthOH_r334 -CC1(CCCCC(O1)O)C AlkEthOH_r335 -CC1(CCCCC(O1)O)O AlkEthOH_r336 -CC1(CCCCC1(C)C)C AlkEthOH_r337 -CC1(CCCCC1(C)O)C AlkEthOH_r338 -CC1(CCCCC1(C)O)O AlkEthOH_r339 -CC1(CCCCC1)C AlkEthOH_r340 -CC1(CCCCC1)O AlkEthOH_r341 -CC1(CCCCC1O)C AlkEthOH_r342 -CC1(CCCCC1O)O AlkEthOH_r343 -CC1(CCCCCC(C1)(C)C)C AlkEthOH_r344 -CC1(CCCCCC(C1)(C)O)C AlkEthOH_r345 -CC1(CCCCCC(C1)(C)O)O AlkEthOH_r346 -CC1(CCCCCC(C1)O)C AlkEthOH_r347 -CC1(CCCCCC(C1)O)O AlkEthOH_r348 -CC1(CCCCCC(O1)(C)C)C AlkEthOH_r349 -CC1(CCCCCC(O1)(C)O)C AlkEthOH_r350 -CC1(CCCCCC(O1)(C)O)O AlkEthOH_r351 -CC1(CCCCCC(O1)O)C AlkEthOH_r352 -CC1(CCCCCC(O1)O)O AlkEthOH_r353 -CC1(CCCCCC1(C)C)C AlkEthOH_r354 -CC1(CCCCCC1(C)O)C AlkEthOH_r355 -CC1(CCCCCC1(C)O)O AlkEthOH_r356 -CC1(CCCCCC1)C AlkEthOH_r357 -CC1(CCCCCC1)O AlkEthOH_r358 -CC1(CCCCCC1O)C AlkEthOH_r359 -CC1(CCCCCC1O)O AlkEthOH_r360 -CC1(CCCCCCC(C1)(C)C)C AlkEthOH_r361 -CC1(CCCCCCC(C1)(C)O)C AlkEthOH_r362 -CC1(CCCCCCC(C1)(C)O)O AlkEthOH_r363 -CC1(CCCCCCC(C1)O)C AlkEthOH_r364 -CC1(CCCCCCC(C1)O)O AlkEthOH_r365 -CC1(CCCCCCC(O1)(C)C)C AlkEthOH_r366 -CC1(CCCCCCC(O1)(C)O)C AlkEthOH_r367 -CC1(CCCCCCC(O1)(C)O)O AlkEthOH_r368 -CC1(CCCCCCC(O1)O)C AlkEthOH_r369 -CC1(CCCCCCC(O1)O)O AlkEthOH_r370 -CC1(CCCCCCC1(C)C)C AlkEthOH_r371 -CC1(CCCCCCC1(C)O)C AlkEthOH_r372 -CC1(CCCCCCC1(C)O)O AlkEthOH_r373 -CC1(CCCCCCC1)C AlkEthOH_r374 -CC1(CCCCCCC1)O AlkEthOH_r375 -CC1(CCCCCCC1O)C AlkEthOH_r376 -CC1(CCCCCCC1O)O AlkEthOH_r377 -CC1(CCCCCCCC1)C AlkEthOH_r378 -CC1(CCCCCCCC1)O AlkEthOH_r379 -CC1(CCCCCCCO1)C AlkEthOH_r380 -CC1(CCCCCCCO1)O AlkEthOH_r381 -CC1(CCCCCCO1)C AlkEthOH_r382 -CC1(CCCCCCO1)O AlkEthOH_r383 -CC1(CCCCCCOC1)C AlkEthOH_r384 -CC1(CCCCCCOC1)O AlkEthOH_r385 -CC1(CCCCCCOO1)C AlkEthOH_r386 -CC1(CCCCCCOO1)O AlkEthOH_r387 -CC1(CCCCCO1)C AlkEthOH_r388 -CC1(CCCCCO1)O AlkEthOH_r389 -CC1(CCCCCOC(C1)(C)C)C AlkEthOH_r390 -CC1(CCCCCOC(C1)(C)O)C AlkEthOH_r391 -CC1(CCCCCOC(C1)(C)O)O AlkEthOH_r392 -CC1(CCCCCOC(C1)O)C AlkEthOH_r393 -CC1(CCCCCOC(C1)O)O AlkEthOH_r394 -CC1(CCCCCOC(O1)(C)C)C AlkEthOH_r395 -CC1(CCCCCOC(O1)(C)O)C AlkEthOH_r396 -CC1(CCCCCOC(O1)(C)O)O AlkEthOH_r397 -CC1(CCCCCOC(O1)O)C AlkEthOH_r398 -CC1(CCCCCOC(O1)O)O AlkEthOH_r399 -CC1(CCCCCOC1(C)C)C AlkEthOH_r400 -CC1(CCCCCOC1(C)O)C AlkEthOH_r401 -CC1(CCCCCOC1(C)O)O AlkEthOH_r402 -CC1(CCCCCOC1)C AlkEthOH_r403 -CC1(CCCCCOC1)O AlkEthOH_r404 -CC1(CCCCCOC1O)C AlkEthOH_r405 -CC1(CCCCCOC1O)O AlkEthOH_r406 -CC1(CCCCCOCC1)C AlkEthOH_r407 -CC1(CCCCCOCC1)O AlkEthOH_r408 -CC1(CCCCCOO1)C AlkEthOH_r409 -CC1(CCCCCOO1)O AlkEthOH_r410 -CC1(CCCCCOOC1)C AlkEthOH_r411 -CC1(CCCCCOOC1)O AlkEthOH_r412 -CC1(CCCCO1)C AlkEthOH_r413 -CC1(CCCCO1)O AlkEthOH_r414 -CC1(CCCCOC(C1)(C)C)C AlkEthOH_r415 -CC1(CCCCOC(C1)(C)O)C AlkEthOH_r416 -CC1(CCCCOC(C1)(C)O)O AlkEthOH_r417 -CC1(CCCCOC(C1)O)C AlkEthOH_r418 -CC1(CCCCOC(C1)O)O AlkEthOH_r419 -CC1(CCCCOC(O1)(C)C)C AlkEthOH_r420 -CC1(CCCCOC(O1)(C)O)C AlkEthOH_r421 -CC1(CCCCOC(O1)(C)O)O AlkEthOH_r422 -CC1(CCCCOC(O1)O)C AlkEthOH_r423 -CC1(CCCCOC(O1)O)O AlkEthOH_r424 -CC1(CCCCOC1(C)C)C AlkEthOH_r425 -CC1(CCCCOC1(C)O)C AlkEthOH_r426 -CC1(CCCCOC1(C)O)O AlkEthOH_r427 -CC1(CCCCOC1)C AlkEthOH_r428 -CC1(CCCCOC1)O AlkEthOH_r429 -CC1(CCCCOC1O)C AlkEthOH_r430 -CC1(CCCCOC1O)O AlkEthOH_r431 -CC1(CCCCOCC(C1)(C)C)C AlkEthOH_r432 -CC1(CCCCOCC(C1)(C)O)C AlkEthOH_r433 -CC1(CCCCOCC(C1)(C)O)O AlkEthOH_r434 -CC1(CCCCOCC(C1)O)C AlkEthOH_r435 -CC1(CCCCOCC(C1)O)O AlkEthOH_r436 -CC1(CCCCOCC(O1)(C)C)C AlkEthOH_r437 -CC1(CCCCOCC(O1)(C)O)C AlkEthOH_r438 -CC1(CCCCOCC(O1)(C)O)O AlkEthOH_r439 -CC1(CCCCOCC(O1)O)C AlkEthOH_r440 -CC1(CCCCOCC(O1)O)O AlkEthOH_r441 -CC1(CCCCOCC1(C)C)C AlkEthOH_r442 -CC1(CCCCOCC1(C)O)C AlkEthOH_r443 -CC1(CCCCOCC1(C)O)O AlkEthOH_r444 -CC1(CCCCOCC1)C AlkEthOH_r445 -CC1(CCCCOCC1)O AlkEthOH_r446 -CC1(CCCCOCC1O)C AlkEthOH_r447 -CC1(CCCCOCC1O)O AlkEthOH_r448 -CC1(CCCCOCCC1)C AlkEthOH_r449 -CC1(CCCCOCCC1)O AlkEthOH_r450 -CC1(CCCCOCO1)C AlkEthOH_r451 -CC1(CCCCOCO1)O AlkEthOH_r452 -CC1(CCCCOCOC1)C AlkEthOH_r453 -CC1(CCCCOCOC1)O AlkEthOH_r454 -CC1(CCCCOO1)C AlkEthOH_r455 -CC1(CCCCOO1)O AlkEthOH_r456 -CC1(CCCCOOC1)C AlkEthOH_r457 -CC1(CCCCOOC1)O AlkEthOH_r458 -CC1(CCCO1)C AlkEthOH_r459 -CC1(CCCO1)O AlkEthOH_r460 -CC1(CCCOC(C1)(C)C)C AlkEthOH_r461 -CC1(CCCOC(C1)(C)O)C AlkEthOH_r462 -CC1(CCCOC(C1)(C)O)O AlkEthOH_r463 -CC1(CCCOC(C1)O)C AlkEthOH_r464 -CC1(CCCOC(C1)O)O AlkEthOH_r465 -CC1(CCCOC(O1)(C)C)C AlkEthOH_r466 -CC1(CCCOC(O1)(C)O)C AlkEthOH_r467 -CC1(CCCOC(O1)(C)O)O AlkEthOH_r468 -CC1(CCCOC(O1)O)C AlkEthOH_r469 -CC1(CCCOC(O1)O)O AlkEthOH_r470 -CC1(CCCOC1(C)C)C AlkEthOH_r471 -CC1(CCCOC1(C)O)C AlkEthOH_r472 -CC1(CCCOC1(C)O)O AlkEthOH_r473 -CC1(CCCOC1)C AlkEthOH_r474 -CC1(CCCOC1)O AlkEthOH_r475 -CC1(CCCOC1O)C AlkEthOH_r476 -CC1(CCCOC1O)O AlkEthOH_r477 -CC1(CCCOCC(C1)(C)C)C AlkEthOH_r478 -CC1(CCCOCC(C1)(C)O)C AlkEthOH_r479 -CC1(CCCOCC(C1)(C)O)O AlkEthOH_r480 -CC1(CCCOCC(C1)O)C AlkEthOH_r481 -CC1(CCCOCC(C1)O)O AlkEthOH_r482 -CC1(CCCOCC(O1)(C)C)C AlkEthOH_r483 -CC1(CCCOCC(O1)(C)O)C AlkEthOH_r484 -CC1(CCCOCC(O1)(C)O)O AlkEthOH_r485 -CC1(CCCOCC(O1)O)C AlkEthOH_r486 -CC1(CCCOCC(O1)O)O AlkEthOH_r487 -CC1(CCCOCC1(C)C)C AlkEthOH_r488 -CC1(CCCOCC1(C)O)C AlkEthOH_r489 -CC1(CCCOCC1(C)O)O AlkEthOH_r490 -CC1(CCCOCC1)C AlkEthOH_r491 -CC1(CCCOCC1)O AlkEthOH_r492 -CC1(CCCOCC1O)C AlkEthOH_r493 -CC1(CCCOCC1O)O AlkEthOH_r494 -CC1(CCCOCCC1)C AlkEthOH_r495 -CC1(CCCOCCC1)O AlkEthOH_r496 -CC1(CCCOCCCO1)C AlkEthOH_r497 -CC1(CCCOCCCO1)O AlkEthOH_r498 -CC1(CCCOCCO1)C AlkEthOH_r499 -CC1(CCCOCCO1)O AlkEthOH_r500 -CC1(CCCOCO1)C AlkEthOH_r501 -CC1(CCCOCO1)O AlkEthOH_r502 -CC1(CCCOCOC1)C AlkEthOH_r503 -CC1(CCCOCOC1)O AlkEthOH_r504 -CC1(CCCOO1)C AlkEthOH_r505 -CC1(CCCOO1)O AlkEthOH_r506 -CC1(CCCOOC1)C AlkEthOH_r507 -CC1(CCCOOC1)O AlkEthOH_r508 -CC1(CCO1)C AlkEthOH_r509 -CC1(CCO1)O AlkEthOH_r510 -CC1(CCOC(C1)(C)C)C AlkEthOH_r511 -CC1(CCOC(C1)(C)O)C AlkEthOH_r512 -CC1(CCOC(C1)(C)O)O AlkEthOH_r513 -CC1(CCOC(C1)O)C AlkEthOH_r514 -CC1(CCOC(C1)O)O AlkEthOH_r515 -CC1(CCOC(O1)(C)C)C AlkEthOH_r516 -CC1(CCOC(O1)(C)O)C AlkEthOH_r517 -CC1(CCOC(O1)(C)O)O AlkEthOH_r518 -CC1(CCOC(O1)O)C AlkEthOH_r519 -CC1(CCOC(O1)O)O AlkEthOH_r520 -CC1(CCOC1(C)C)C AlkEthOH_r521 -CC1(CCOC1(C)O)C AlkEthOH_r522 -CC1(CCOC1(C)O)O AlkEthOH_r523 -CC1(CCOC1)C AlkEthOH_r524 -CC1(CCOC1)O AlkEthOH_r525 -CC1(CCOC1O)C AlkEthOH_r526 -CC1(CCOC1O)O AlkEthOH_r527 -CC1(CCOCC(C1)(C)C)C AlkEthOH_r528 -CC1(CCOCC(C1)(C)O)C AlkEthOH_r529 -CC1(CCOCC(C1)(C)O)O AlkEthOH_r530 -CC1(CCOCC(C1)O)C AlkEthOH_r531 -CC1(CCOCC(C1)O)O AlkEthOH_r532 -CC1(CCOCC(O1)(C)C)C AlkEthOH_r533 -CC1(CCOCC(O1)(C)O)C AlkEthOH_r534 -CC1(CCOCC(O1)(C)O)O AlkEthOH_r535 -CC1(CCOCC(O1)O)C AlkEthOH_r536 -CC1(CCOCC(O1)O)O AlkEthOH_r537 -CC1(CCOCC1(C)C)C AlkEthOH_r538 -CC1(CCOCC1(C)O)C AlkEthOH_r539 -CC1(CCOCC1(C)O)O AlkEthOH_r540 -CC1(CCOCC1)C AlkEthOH_r541 -CC1(CCOCC1)O AlkEthOH_r542 -CC1(CCOCC1O)C AlkEthOH_r543 -CC1(CCOCC1O)O AlkEthOH_r544 -CC1(CCOCCC(C1)(C)C)C AlkEthOH_r545 -CC1(CCOCCC(C1)(C)O)C AlkEthOH_r546 -CC1(CCOCCC(C1)(C)O)O AlkEthOH_r547 -CC1(CCOCCC(C1)O)C AlkEthOH_r548 -CC1(CCOCCC(C1)O)O AlkEthOH_r549 -CC1(CCOCCC(O1)(C)C)C AlkEthOH_r550 -CC1(CCOCCC(O1)(C)O)C AlkEthOH_r551 -CC1(CCOCCC(O1)(C)O)O AlkEthOH_r552 -CC1(CCOCCC(O1)O)C AlkEthOH_r553 -CC1(CCOCCC(O1)O)O AlkEthOH_r554 -CC1(CCOCCC1(C)C)C AlkEthOH_r555 -CC1(CCOCCC1(C)O)C AlkEthOH_r556 -CC1(CCOCCC1(C)O)O AlkEthOH_r557 -CC1(CCOCCC1O)C AlkEthOH_r558 -CC1(CCOCCC1O)O AlkEthOH_r559 -CC1(CCOCCCCO1)C AlkEthOH_r560 -CC1(CCOCCCCO1)O AlkEthOH_r561 -CC1(CCOCCCO1)C AlkEthOH_r562 -CC1(CCOCCCO1)O AlkEthOH_r563 -CC1(CCOCCO1)C AlkEthOH_r564 -CC1(CCOCCO1)O AlkEthOH_r565 -CC1(CCOCCOC1)C AlkEthOH_r566 -CC1(CCOCCOC1)O AlkEthOH_r567 -CC1(CCOCCOO1)C AlkEthOH_r568 -CC1(CCOCCOO1)O AlkEthOH_r569 -CC1(CCOCO1)C AlkEthOH_r570 -CC1(CCOCO1)O AlkEthOH_r571 -CC1(CCOCOC(C1)(C)C)C AlkEthOH_r572 -CC1(CCOCOC(C1)(C)O)C AlkEthOH_r573 -CC1(CCOCOC(C1)(C)O)O AlkEthOH_r574 -CC1(CCOCOC(C1)O)C AlkEthOH_r575 -CC1(CCOCOC(C1)O)O AlkEthOH_r576 -CC1(CCOCOC(O1)(C)C)C AlkEthOH_r577 -CC1(CCOCOC(O1)(C)O)C AlkEthOH_r578 -CC1(CCOCOC(O1)(C)O)O AlkEthOH_r579 -CC1(CCOCOC(O1)O)C AlkEthOH_r580 -CC1(CCOCOC(O1)O)O AlkEthOH_r581 -CC1(CCOCOC1(C)C)C AlkEthOH_r582 -CC1(CCOCOC1(C)O)C AlkEthOH_r583 -CC1(CCOCOC1(C)O)O AlkEthOH_r584 -CC1(CCOCOC1)C AlkEthOH_r585 -CC1(CCOCOC1)O AlkEthOH_r586 -CC1(CCOCOC1O)C AlkEthOH_r587 -CC1(CCOCOC1O)O AlkEthOH_r588 -CC1(CCOCOO1)C AlkEthOH_r589 -CC1(CCOCOO1)O AlkEthOH_r590 -CC1(CCOO1)C AlkEthOH_r591 -CC1(CCOO1)O AlkEthOH_r592 -CC1(CCOOC1)C AlkEthOH_r593 -CC1(CCOOC1)O AlkEthOH_r594 -CC1(CO1)C AlkEthOH_r595 -CC1(CO1)O AlkEthOH_r596 -CC1(COC(O1)(C)C)C AlkEthOH_r597 -CC1(COC(O1)(C)O)C AlkEthOH_r598 -CC1(COC(O1)(C)O)O AlkEthOH_r599 -CC1(COC(O1)O)C AlkEthOH_r600 -CC1(COC(O1)O)O AlkEthOH_r601 -CC1(COC1(C)C)C AlkEthOH_r602 -CC1(COC1(C)O)C AlkEthOH_r603 -CC1(COC1(C)O)O AlkEthOH_r604 -CC1(COC1)C AlkEthOH_r605 -CC1(COC1)O AlkEthOH_r606 -CC1(COC1O)C AlkEthOH_r607 -CC1(COC1O)O AlkEthOH_r608 -CC1(COCCC(O1)(C)O)C AlkEthOH_r609 -CC1(COCCC(O1)O)C AlkEthOH_r610 -CC1(COCCC(O1)O)O AlkEthOH_r611 -CC1(COCCC1(C)O)C AlkEthOH_r612 -CC1(COCCC1O)C AlkEthOH_r613 -CC1(COCCC1O)O AlkEthOH_r614 -CC1(COCCCC(O1)(C)O)C AlkEthOH_r615 -CC1(COCCCC(O1)O)C AlkEthOH_r616 -CC1(COCCCC(O1)O)O AlkEthOH_r617 -CC1(COCCCC1(C)O)C AlkEthOH_r618 -CC1(COCCCC1O)C AlkEthOH_r619 -CC1(COCCCC1O)O AlkEthOH_r620 -CC1(COCCCCC(O1)(C)O)C AlkEthOH_r621 -CC1(COCCCCC(O1)O)C AlkEthOH_r622 -CC1(COCCCCC(O1)O)O AlkEthOH_r623 -CC1(COCCCCC1(C)O)C AlkEthOH_r624 -CC1(COCCCCC1O)C AlkEthOH_r625 -CC1(COCCCCC1O)O AlkEthOH_r626 -CC1(COCCCCCO1)C AlkEthOH_r627 -CC1(COCCCCCO1)O AlkEthOH_r628 -CC1(COCCCCO1)C AlkEthOH_r629 -CC1(COCCCCO1)O AlkEthOH_r630 -CC1(COCCCCOC1)C AlkEthOH_r631 -CC1(COCCCCOC1)O AlkEthOH_r632 -CC1(COCCCCOO1)C AlkEthOH_r633 -CC1(COCCCCOO1)O AlkEthOH_r634 -CC1(COCCCO1)C AlkEthOH_r635 -CC1(COCCCO1)O AlkEthOH_r636 -CC1(COCCCOC(O1)(C)C)C AlkEthOH_r637 -CC1(COCCCOC(O1)(C)O)C AlkEthOH_r638 -CC1(COCCCOC(O1)(C)O)O AlkEthOH_r639 -CC1(COCCCOC(O1)O)C AlkEthOH_r640 -CC1(COCCCOC(O1)O)O AlkEthOH_r641 -CC1(COCCCOC1(C)C)C AlkEthOH_r642 -CC1(COCCCOC1(C)O)C AlkEthOH_r643 -CC1(COCCCOC1(C)O)O AlkEthOH_r644 -CC1(COCCCOC1)C AlkEthOH_r645 -CC1(COCCCOC1)O AlkEthOH_r646 -CC1(COCCCOC1O)C AlkEthOH_r647 -CC1(COCCCOC1O)O AlkEthOH_r648 -CC1(COCCCOO1)C AlkEthOH_r649 -CC1(COCCCOO1)O AlkEthOH_r650 -CC1(COCCO1)C AlkEthOH_r651 -CC1(COCCO1)O AlkEthOH_r652 -CC1(COCCOC(O1)(C)C)C AlkEthOH_r653 -CC1(COCCOC(O1)(C)O)C AlkEthOH_r654 -CC1(COCCOC(O1)(C)O)O AlkEthOH_r655 -CC1(COCCOC(O1)O)C AlkEthOH_r656 -CC1(COCCOC(O1)O)O AlkEthOH_r657 -CC1(COCCOC1(C)C)C AlkEthOH_r658 -CC1(COCCOC1(C)O)C AlkEthOH_r659 -CC1(COCCOC1(C)O)O AlkEthOH_r660 -CC1(COCCOC1)C AlkEthOH_r661 -CC1(COCCOC1)O AlkEthOH_r662 -CC1(COCCOC1O)C AlkEthOH_r663 -CC1(COCCOC1O)O AlkEthOH_r664 -CC1(COCCOCO1)C AlkEthOH_r665 -CC1(COCCOCO1)O AlkEthOH_r666 -CC1(COCCOO1)C AlkEthOH_r667 -CC1(COCCOO1)O AlkEthOH_r668 -CC1(COCO1)C AlkEthOH_r669 -CC1(COCO1)O AlkEthOH_r670 -CC1(COCOCCCO1)C AlkEthOH_r671 -CC1(COCOCCCO1)O AlkEthOH_r672 -CC1(COCOCCO1)C AlkEthOH_r673 -CC1(COCOCCO1)O AlkEthOH_r674 -CC1(COO1)C AlkEthOH_r675 -CC1(COO1)O AlkEthOH_r676 -CC1(COOCCCCO1)C AlkEthOH_r677 -CC1(COOCCCCO1)O AlkEthOH_r678 -CC1(COOCCO1)C AlkEthOH_r679 -CC1(COOCCO1)O AlkEthOH_r680 -CC1(OCC(O1)(C)O)C AlkEthOH_r681 -CC1(OCC(O1)O)C AlkEthOH_r682 -CC1(OCC(O1)O)O AlkEthOH_r683 -CC1(OCCC(O1)(C)O)C AlkEthOH_r684 -CC1(OCCC(O1)O)C AlkEthOH_r685 -CC1(OCCC(O1)O)O AlkEthOH_r686 -CC1(OCCCC(O1)(C)O)C AlkEthOH_r687 -CC1(OCCCC(O1)O)C AlkEthOH_r688 -CC1(OCCCC(O1)O)O AlkEthOH_r689 -CC1(OCCCCC(O1)(C)O)C AlkEthOH_r690 -CC1(OCCCCC(O1)O)C AlkEthOH_r691 -CC1(OCCCCC(O1)O)O AlkEthOH_r692 -CC1(OCCCCCC(O1)(C)O)C AlkEthOH_r693 -CC1(OCCCCCC(O1)O)C AlkEthOH_r694 -CC1(OCCCCCC(O1)O)O AlkEthOH_r695 -CC1(OCCCCCCO1)C AlkEthOH_r696 -CC1(OCCCCCCO1)O AlkEthOH_r697 -CC1(OCCCCCO1)C AlkEthOH_r698 -CC1(OCCCCCO1)O AlkEthOH_r699 -CC1(OCCCCCOO1)C AlkEthOH_r700 -CC1(OCCCCCOO1)O AlkEthOH_r701 -CC1(OCCCCO1)C AlkEthOH_r702 -CC1(OCCCCO1)O AlkEthOH_r703 -CC1(OCCCCOC(O1)(C)C)C AlkEthOH_r704 -CC1(OCCCCOC(O1)(C)O)C AlkEthOH_r705 -CC1(OCCCCOC(O1)(C)O)O AlkEthOH_r706 -CC1(OCCCCOC(O1)O)C AlkEthOH_r707 -CC1(OCCCCOC(O1)O)O AlkEthOH_r708 -CC1(OCCCCOO1)C AlkEthOH_r709 -CC1(OCCCCOO1)O AlkEthOH_r710 -CC1(OCCCO1)C AlkEthOH_r711 -CC1(OCCCO1)O AlkEthOH_r712 -CC1(OCCCOCC(O1)(C)O)C AlkEthOH_r713 -CC1(OCCCOCC(O1)O)C AlkEthOH_r714 -CC1(OCCCOCC(O1)O)O AlkEthOH_r715 -CC1(OCCCOCO1)C AlkEthOH_r716 -CC1(OCCCOCO1)O AlkEthOH_r717 -CC1(OCCCOO1)C AlkEthOH_r718 -CC1(OCCCOO1)O AlkEthOH_r719 -CC1(OCCO1)C AlkEthOH_r720 -CC1(OCCO1)O AlkEthOH_r721 -CC1(OCCOC(O1)(C)C)C AlkEthOH_r722 -CC1(OCCOC(O1)(C)O)C AlkEthOH_r723 -CC1(OCCOC(O1)(C)O)O AlkEthOH_r724 -CC1(OCCOC(O1)O)C AlkEthOH_r725 -CC1(OCCOC(O1)O)O AlkEthOH_r726 -CC1(OCCOCC(O1)(C)O)C AlkEthOH_r727 -CC1(OCCOCC(O1)O)C AlkEthOH_r728 -CC1(OCCOCC(O1)O)O AlkEthOH_r729 -CC1(OCCOCO1)C AlkEthOH_r730 -CC1(OCCOCO1)O AlkEthOH_r731 -CC1(OCCOO1)C AlkEthOH_r732 -CC1(OCCOO1)O AlkEthOH_r733 -CC1(OCO1)C AlkEthOH_r734 -CC1(OCO1)O AlkEthOH_r735 -CC1(OCOCCC(O1)(C)O)C AlkEthOH_r736 -CC1(OCOCCC(O1)O)C AlkEthOH_r737 -CC1(OCOCCC(O1)O)O AlkEthOH_r738 -CC1C(CCCCCO1)(C)C AlkEthOH_r739 -CC1C(CCCCCO1)(C)O AlkEthOH_r740 -CC1C(CCCCCO1)O AlkEthOH_r741 -CC1C(CCCCO1)(C)C AlkEthOH_r742 -CC1C(CCCCO1)(C)O AlkEthOH_r743 -CC1C(CCCCO1)O AlkEthOH_r744 -CC1C(CCCO1)(C)C AlkEthOH_r745 -CC1C(CCCO1)(C)O AlkEthOH_r746 -CC1C(CCCO1)O AlkEthOH_r747 -CC1C(CCO1)(C)C AlkEthOH_r748 -CC1C(CCO1)(C)O AlkEthOH_r749 -CC1C(CCO1)O AlkEthOH_r750 -CC1C(CCOCO1)(C)C AlkEthOH_r751 -CC1C(CCOCO1)(C)O AlkEthOH_r752 -CC1C(CCOCO1)O AlkEthOH_r753 -CC1C(CO1)(C)C AlkEthOH_r754 -CC1C(CO1)(C)O AlkEthOH_r755 -CC1C(CO1)O AlkEthOH_r756 -CC1C(COCCCO1)(C)C AlkEthOH_r757 -CC1C(COCCCO1)(C)O AlkEthOH_r758 -CC1C(COCCCO1)O AlkEthOH_r759 -CC1C(COCCO1)(C)C AlkEthOH_r760 -CC1C(COCCO1)(C)O AlkEthOH_r761 -CC1C(COCCO1)O AlkEthOH_r762 -CC1C(OCCCCO1)(C)C AlkEthOH_r763 -CC1C(OCCCCO1)(C)O AlkEthOH_r764 -CC1C(OCCCCO1)C AlkEthOH_r765 -CC1C(OCCCCO1)O AlkEthOH_r766 -CC1C(OCCO1)(C)C AlkEthOH_r767 -CC1C(OCCO1)(C)O AlkEthOH_r768 -CC1C(OCCO1)C AlkEthOH_r769 -CC1C(OCCO1)O AlkEthOH_r770 -CC1CC(C1)(C)C AlkEthOH_r771 -CC1CC(C1)(C)O AlkEthOH_r772 -CC1CC(C1)C AlkEthOH_r773 -CC1CC(C1)O AlkEthOH_r774 -CC1CC(CCCCCO1)(C)C AlkEthOH_r775 -CC1CC(CCCCCO1)(C)O AlkEthOH_r776 -CC1CC(CCCCCO1)O AlkEthOH_r777 -CC1CC(CCCCO1)(C)C AlkEthOH_r778 -CC1CC(CCCCO1)(C)O AlkEthOH_r779 -CC1CC(CCCCO1)O AlkEthOH_r780 -CC1CC(CCCCOC1)(C)C AlkEthOH_r781 -CC1CC(CCCCOC1)(C)O AlkEthOH_r782 -CC1CC(CCCCOC1)O AlkEthOH_r783 -CC1CC(CCCO1)(C)C AlkEthOH_r784 -CC1CC(CCCO1)(C)O AlkEthOH_r785 -CC1CC(CCCO1)O AlkEthOH_r786 -CC1CC(CCCOC1)(C)C AlkEthOH_r787 -CC1CC(CCCOC1)(C)O AlkEthOH_r788 -CC1CC(CCCOC1)O AlkEthOH_r789 -CC1CC(CCO1)(C)C AlkEthOH_r790 -CC1CC(CCO1)(C)O AlkEthOH_r791 -CC1CC(CCO1)O AlkEthOH_r792 -CC1CC(CCOC1)(C)C AlkEthOH_r793 -CC1CC(CCOC1)(C)O AlkEthOH_r794 -CC1CC(CCOC1)O AlkEthOH_r795 -CC1CC(CCOCO1)(C)C AlkEthOH_r796 -CC1CC(CCOCO1)(C)O AlkEthOH_r797 -CC1CC(CCOCO1)O AlkEthOH_r798 -CC1CC(CO1)(C)C AlkEthOH_r799 -CC1CC(CO1)(C)O AlkEthOH_r800 -CC1CC(CO1)O AlkEthOH_r801 -CC1CC(COCCCO1)(C)C AlkEthOH_r802 -CC1CC(COCCCO1)(C)O AlkEthOH_r803 -CC1CC(COCCCO1)O AlkEthOH_r804 -CC1CC(COCCO1)(C)C AlkEthOH_r805 -CC1CC(COCCO1)(C)O AlkEthOH_r806 -CC1CC(COCCO1)O AlkEthOH_r807 -CC1CC(O1)(C)C AlkEthOH_r808 -CC1CC(O1)(C)O AlkEthOH_r809 -CC1CC(O1)C AlkEthOH_r810 -CC1CC(O1)O AlkEthOH_r811 -CC1CC(OC1)(C)C AlkEthOH_r812 -CC1CC(OC1)(C)O AlkEthOH_r813 -CC1CC(OC1)C AlkEthOH_r814 -CC1CC(OC1)O AlkEthOH_r815 -CC1CC(OCCCCO1)(C)C AlkEthOH_r816 -CC1CC(OCCCCO1)(C)O AlkEthOH_r817 -CC1CC(OCCCCO1)C AlkEthOH_r818 -CC1CC(OCCCCO1)O AlkEthOH_r819 -CC1CC(OCCCOC1)(C)C AlkEthOH_r820 -CC1CC(OCCCOC1)(C)O AlkEthOH_r821 -CC1CC(OCCCOC1)C AlkEthOH_r822 -CC1CC(OCCCOC1)O AlkEthOH_r823 -CC1CC(OCCO1)(C)C AlkEthOH_r824 -CC1CC(OCCO1)(C)O AlkEthOH_r825 -CC1CC(OCCO1)C AlkEthOH_r826 -CC1CC(OCCO1)O AlkEthOH_r827 -CC1CC(OCCOC1)(C)C AlkEthOH_r828 -CC1CC(OCCOC1)(C)O AlkEthOH_r829 -CC1CC(OCCOC1)C AlkEthOH_r830 -CC1CC(OCCOC1)O AlkEthOH_r831 -CC1CC1 AlkEthOH_r832 -CC1CC1(C)C AlkEthOH_r833 -CC1CC1(C)O AlkEthOH_r834 -CC1CC1C AlkEthOH_r835 -CC1CC1O AlkEthOH_r836 -CC1CCC(C1)(C)C AlkEthOH_r837 -CC1CCC(C1)(C)O AlkEthOH_r838 -CC1CCC(C1)C AlkEthOH_r839 -CC1CCC(C1)O AlkEthOH_r840 -CC1CCC(O1)(C)C AlkEthOH_r841 -CC1CCC(O1)(C)O AlkEthOH_r842 -CC1CCC(O1)C AlkEthOH_r843 -CC1CCC(O1)O AlkEthOH_r844 -CC1CCC1 AlkEthOH_r845 -CC1CCC1(C)C AlkEthOH_r846 -CC1CCC1(C)O AlkEthOH_r847 -CC1CCC1C AlkEthOH_r848 -CC1CCC1O AlkEthOH_r849 -CC1CCCC(C1)(C)C AlkEthOH_r850 -CC1CCCC(C1)(C)O AlkEthOH_r851 -CC1CCCC(C1)C AlkEthOH_r852 -CC1CCCC(C1)O AlkEthOH_r853 -CC1CCCC(O1)(C)C AlkEthOH_r854 -CC1CCCC(O1)(C)O AlkEthOH_r855 -CC1CCCC(O1)C AlkEthOH_r856 -CC1CCCC(O1)O AlkEthOH_r857 -CC1CCCC1 AlkEthOH_r858 -CC1CCCC1(C)C AlkEthOH_r859 -CC1CCCC1(C)O AlkEthOH_r860 -CC1CCCC1C AlkEthOH_r861 -CC1CCCC1O AlkEthOH_r862 -CC1CCCCC(C1)(C)C AlkEthOH_r863 -CC1CCCCC(C1)(C)O AlkEthOH_r864 -CC1CCCCC(C1)C AlkEthOH_r865 -CC1CCCCC(C1)O AlkEthOH_r866 -CC1CCCCC(O1)(C)C AlkEthOH_r867 -CC1CCCCC(O1)(C)O AlkEthOH_r868 -CC1CCCCC(O1)C AlkEthOH_r869 -CC1CCCCC(O1)O AlkEthOH_r870 -CC1CCCCC1 AlkEthOH_r871 -CC1CCCCC1(C)C AlkEthOH_r872 -CC1CCCCC1(C)O AlkEthOH_r873 -CC1CCCCC1C AlkEthOH_r874 -CC1CCCCC1O AlkEthOH_r875 -CC1CCCCCC(C1)(C)C AlkEthOH_r876 -CC1CCCCCC(C1)(C)O AlkEthOH_r877 -CC1CCCCCC(C1)C AlkEthOH_r878 -CC1CCCCCC(C1)O AlkEthOH_r879 -CC1CCCCCC(O1)(C)C AlkEthOH_r880 -CC1CCCCCC(O1)(C)O AlkEthOH_r881 -CC1CCCCCC(O1)C AlkEthOH_r882 -CC1CCCCCC(O1)O AlkEthOH_r883 -CC1CCCCCC1 AlkEthOH_r884 -CC1CCCCCC1(C)C AlkEthOH_r885 -CC1CCCCCC1(C)O AlkEthOH_r886 -CC1CCCCCC1C AlkEthOH_r887 -CC1CCCCCC1O AlkEthOH_r888 -CC1CCCCCCC(C1)(C)C AlkEthOH_r889 -CC1CCCCCCC(C1)(C)O AlkEthOH_r890 -CC1CCCCCCC(C1)C AlkEthOH_r891 -CC1CCCCCCC(C1)O AlkEthOH_r892 -CC1CCCCCCC(O1)(C)C AlkEthOH_r893 -CC1CCCCCCC(O1)(C)O AlkEthOH_r894 -CC1CCCCCCC(O1)C AlkEthOH_r895 -CC1CCCCCCC(O1)O AlkEthOH_r896 -CC1CCCCCCC1 AlkEthOH_r897 -CC1CCCCCCC1(C)C AlkEthOH_r898 -CC1CCCCCCC1(C)O AlkEthOH_r899 -CC1CCCCCCC1C AlkEthOH_r900 -CC1CCCCCCC1O AlkEthOH_r901 -CC1CCCCCCCC1 AlkEthOH_r902 -CC1CCCCCCCO1 AlkEthOH_r903 -CC1CCCCCCO1 AlkEthOH_r904 -CC1CCCCCCOC1 AlkEthOH_r905 -CC1CCCCCCOO1 AlkEthOH_r906 -CC1CCCCCO1 AlkEthOH_r907 -CC1CCCCCOC(C1)(C)C AlkEthOH_r908 -CC1CCCCCOC(C1)(C)O AlkEthOH_r909 -CC1CCCCCOC(C1)C AlkEthOH_r910 -CC1CCCCCOC(C1)O AlkEthOH_r911 -CC1CCCCCOC(O1)(C)C AlkEthOH_r912 -CC1CCCCCOC(O1)(C)O AlkEthOH_r913 -CC1CCCCCOC(O1)C AlkEthOH_r914 -CC1CCCCCOC(O1)O AlkEthOH_r915 -CC1CCCCCOC1 AlkEthOH_r916 -CC1CCCCCOC1(C)C AlkEthOH_r917 -CC1CCCCCOC1(C)O AlkEthOH_r918 -CC1CCCCCOC1C AlkEthOH_r919 -CC1CCCCCOC1O AlkEthOH_r920 -CC1CCCCCOCC1 AlkEthOH_r921 -CC1CCCCCOO1 AlkEthOH_r922 -CC1CCCCCOOC1 AlkEthOH_r923 -CC1CCCCO1 AlkEthOH_r924 -CC1CCCCOC(C1)(C)C AlkEthOH_r925 -CC1CCCCOC(C1)(C)O AlkEthOH_r926 -CC1CCCCOC(C1)C AlkEthOH_r927 -CC1CCCCOC(C1)O AlkEthOH_r928 -CC1CCCCOC(O1)(C)C AlkEthOH_r929 -CC1CCCCOC(O1)(C)O AlkEthOH_r930 -CC1CCCCOC(O1)C AlkEthOH_r931 -CC1CCCCOC(O1)O AlkEthOH_r932 -CC1CCCCOC1 AlkEthOH_r933 -CC1CCCCOC1(C)C AlkEthOH_r934 -CC1CCCCOC1(C)O AlkEthOH_r935 -CC1CCCCOC1C AlkEthOH_r936 -CC1CCCCOC1O AlkEthOH_r937 -CC1CCCCOCC(C1)(C)C AlkEthOH_r938 -CC1CCCCOCC(C1)(C)O AlkEthOH_r939 -CC1CCCCOCC(C1)C AlkEthOH_r940 -CC1CCCCOCC(C1)O AlkEthOH_r941 -CC1CCCCOCC(O1)(C)C AlkEthOH_r942 -CC1CCCCOCC(O1)(C)O AlkEthOH_r943 -CC1CCCCOCC(O1)C AlkEthOH_r944 -CC1CCCCOCC(O1)O AlkEthOH_r945 -CC1CCCCOCC1 AlkEthOH_r946 -CC1CCCCOCC1(C)C AlkEthOH_r947 -CC1CCCCOCC1(C)O AlkEthOH_r948 -CC1CCCCOCC1C AlkEthOH_r949 -CC1CCCCOCC1O AlkEthOH_r950 -CC1CCCCOCCC1 AlkEthOH_r951 -CC1CCCCOCO1 AlkEthOH_r952 -CC1CCCCOCOC1 AlkEthOH_r953 -CC1CCCCOO1 AlkEthOH_r954 -CC1CCCCOOC1 AlkEthOH_r955 -CC1CCCO1 AlkEthOH_r956 -CC1CCCOC(C1)(C)C AlkEthOH_r957 -CC1CCCOC(C1)(C)O AlkEthOH_r958 -CC1CCCOC(C1)C AlkEthOH_r959 -CC1CCCOC(C1)O AlkEthOH_r960 -CC1CCCOC(O1)(C)C AlkEthOH_r961 -CC1CCCOC(O1)(C)O AlkEthOH_r962 -CC1CCCOC(O1)C AlkEthOH_r963 -CC1CCCOC(O1)O AlkEthOH_r964 -CC1CCCOC1 AlkEthOH_r965 -CC1CCCOC1(C)C AlkEthOH_r966 -CC1CCCOC1(C)O AlkEthOH_r967 -CC1CCCOC1C AlkEthOH_r968 -CC1CCCOC1O AlkEthOH_r969 -CC1CCCOCC(C1)(C)C AlkEthOH_r970 -CC1CCCOCC(C1)(C)O AlkEthOH_r971 -CC1CCCOCC(C1)C AlkEthOH_r972 -CC1CCCOCC(C1)O AlkEthOH_r973 -CC1CCCOCC(O1)(C)C AlkEthOH_r974 -CC1CCCOCC(O1)(C)O AlkEthOH_r975 -CC1CCCOCC(O1)C AlkEthOH_r976 -CC1CCCOCC(O1)O AlkEthOH_r977 -CC1CCCOCC1 AlkEthOH_r978 -CC1CCCOCC1(C)C AlkEthOH_r979 -CC1CCCOCC1(C)O AlkEthOH_r980 -CC1CCCOCC1C AlkEthOH_r981 -CC1CCCOCC1O AlkEthOH_r982 -CC1CCCOCCC1 AlkEthOH_r983 -CC1CCCOCCCO1 AlkEthOH_r984 -CC1CCCOCCO1 AlkEthOH_r985 -CC1CCCOCO1 AlkEthOH_r986 -CC1CCCOCOC1 AlkEthOH_r987 -CC1CCCOO1 AlkEthOH_r988 -CC1CCCOOC1 AlkEthOH_r989 -CC1CCO1 AlkEthOH_r990 -CC1CCOC(C1)(C)C AlkEthOH_r991 -CC1CCOC(C1)(C)O AlkEthOH_r992 -CC1CCOC(C1)C AlkEthOH_r993 -CC1CCOC(C1)O AlkEthOH_r994 -CC1CCOC(O1)(C)C AlkEthOH_r995 -CC1CCOC(O1)(C)O AlkEthOH_r996 -CC1CCOC(O1)C AlkEthOH_r997 -CC1CCOC(O1)O AlkEthOH_r998 -CC1CCOC1 AlkEthOH_r999 -CC1CCOC1(C)C AlkEthOH_r1000 -CC1CCOC1(C)O AlkEthOH_r1001 -CC1CCOC1C AlkEthOH_r1002 -CC1CCOC1O AlkEthOH_r1003 -CC1CCOCC(C1)(C)C AlkEthOH_r1004 -CC1CCOCC(C1)(C)O AlkEthOH_r1005 -CC1CCOCC(C1)C AlkEthOH_r1006 -CC1CCOCC(C1)O AlkEthOH_r1007 -CC1CCOCC(O1)(C)C AlkEthOH_r1008 -CC1CCOCC(O1)(C)O AlkEthOH_r1009 -CC1CCOCC(O1)C AlkEthOH_r1010 -CC1CCOCC(O1)O AlkEthOH_r1011 -CC1CCOCC1 AlkEthOH_r1012 -CC1CCOCC1(C)C AlkEthOH_r1013 -CC1CCOCC1(C)O AlkEthOH_r1014 -CC1CCOCC1C AlkEthOH_r1015 -CC1CCOCC1O AlkEthOH_r1016 -CC1CCOCCC(C1)(C)C AlkEthOH_r1017 -CC1CCOCCC(C1)(C)O AlkEthOH_r1018 -CC1CCOCCC(C1)C AlkEthOH_r1019 -CC1CCOCCC(C1)O AlkEthOH_r1020 -CC1CCOCCC(O1)(C)C AlkEthOH_r1021 -CC1CCOCCC(O1)(C)O AlkEthOH_r1022 -CC1CCOCCC(O1)C AlkEthOH_r1023 -CC1CCOCCC(O1)O AlkEthOH_r1024 -CC1CCOCCC1(C)C AlkEthOH_r1025 -CC1CCOCCC1(C)O AlkEthOH_r1026 -CC1CCOCCC1C AlkEthOH_r1027 -CC1CCOCCC1O AlkEthOH_r1028 -CC1CCOCCCCO1 AlkEthOH_r1029 -CC1CCOCCCO1 AlkEthOH_r1030 -CC1CCOCCO1 AlkEthOH_r1031 -CC1CCOCCOC1 AlkEthOH_r1032 -CC1CCOCCOO1 AlkEthOH_r1033 -CC1CCOCO1 AlkEthOH_r1034 -CC1CCOCOC(C1)(C)C AlkEthOH_r1035 -CC1CCOCOC(C1)(C)O AlkEthOH_r1036 -CC1CCOCOC(C1)C AlkEthOH_r1037 -CC1CCOCOC(C1)O AlkEthOH_r1038 -CC1CCOCOC(O1)(C)C AlkEthOH_r1039 -CC1CCOCOC(O1)(C)O AlkEthOH_r1040 -CC1CCOCOC(O1)C AlkEthOH_r1041 -CC1CCOCOC(O1)O AlkEthOH_r1042 -CC1CCOCOC1 AlkEthOH_r1043 -CC1CCOCOC1(C)C AlkEthOH_r1044 -CC1CCOCOC1(C)O AlkEthOH_r1045 -CC1CCOCOC1C AlkEthOH_r1046 -CC1CCOCOC1O AlkEthOH_r1047 -CC1CCOCOO1 AlkEthOH_r1048 -CC1CCOO1 AlkEthOH_r1049 -CC1CCOOC1 AlkEthOH_r1050 -CC1CO1 AlkEthOH_r1051 -CC1COC(O1)(C)C AlkEthOH_r1052 -CC1COC(O1)(C)O AlkEthOH_r1053 -CC1COC(O1)C AlkEthOH_r1054 -CC1COC(O1)O AlkEthOH_r1055 -CC1COC1 AlkEthOH_r1056 -CC1COC1(C)C AlkEthOH_r1057 -CC1COC1(C)O AlkEthOH_r1058 -CC1COC1C AlkEthOH_r1059 -CC1COC1O AlkEthOH_r1060 -CC1COCCC(O1)(C)C AlkEthOH_r1061 -CC1COCCC(O1)(C)O AlkEthOH_r1062 -CC1COCCC(O1)O AlkEthOH_r1063 -CC1COCCC1(C)C AlkEthOH_r1064 -CC1COCCC1(C)O AlkEthOH_r1065 -CC1COCCC1O AlkEthOH_r1066 -CC1COCCCC(O1)(C)C AlkEthOH_r1067 -CC1COCCCC(O1)(C)O AlkEthOH_r1068 -CC1COCCCC(O1)O AlkEthOH_r1069 -CC1COCCCC1(C)C AlkEthOH_r1070 -CC1COCCCC1(C)O AlkEthOH_r1071 -CC1COCCCC1O AlkEthOH_r1072 -CC1COCCCCC(O1)(C)C AlkEthOH_r1073 -CC1COCCCCC(O1)(C)O AlkEthOH_r1074 -CC1COCCCCC(O1)O AlkEthOH_r1075 -CC1COCCCCC1(C)C AlkEthOH_r1076 -CC1COCCCCC1(C)O AlkEthOH_r1077 -CC1COCCCCC1O AlkEthOH_r1078 -CC1COCCCCCO1 AlkEthOH_r1079 -CC1COCCCCO1 AlkEthOH_r1080 -CC1COCCCCOC1 AlkEthOH_r1081 -CC1COCCCCOO1 AlkEthOH_r1082 -CC1COCCCO1 AlkEthOH_r1083 -CC1COCCCOC(O1)(C)C AlkEthOH_r1084 -CC1COCCCOC(O1)(C)O AlkEthOH_r1085 -CC1COCCCOC(O1)C AlkEthOH_r1086 -CC1COCCCOC(O1)O AlkEthOH_r1087 -CC1COCCCOC1 AlkEthOH_r1088 -CC1COCCCOC1(C)C AlkEthOH_r1089 -CC1COCCCOC1(C)O AlkEthOH_r1090 -CC1COCCCOC1C AlkEthOH_r1091 -CC1COCCCOC1O AlkEthOH_r1092 -CC1COCCCOO1 AlkEthOH_r1093 -CC1COCCO1 AlkEthOH_r1094 -CC1COCCOC(O1)(C)C AlkEthOH_r1095 -CC1COCCOC(O1)(C)O AlkEthOH_r1096 -CC1COCCOC(O1)C AlkEthOH_r1097 -CC1COCCOC(O1)O AlkEthOH_r1098 -CC1COCCOC1 AlkEthOH_r1099 -CC1COCCOC1(C)C AlkEthOH_r1100 -CC1COCCOC1(C)O AlkEthOH_r1101 -CC1COCCOC1C AlkEthOH_r1102 -CC1COCCOC1O AlkEthOH_r1103 -CC1COCCOCO1 AlkEthOH_r1104 -CC1COCCOO1 AlkEthOH_r1105 -CC1COCO1 AlkEthOH_r1106 -CC1COCOCCCO1 AlkEthOH_r1107 -CC1COCOCCO1 AlkEthOH_r1108 -CC1COO1 AlkEthOH_r1109 -CC1COOCCCCO1 AlkEthOH_r1110 -CC1COOCCO1 AlkEthOH_r1111 -CC1OCC(O1)(C)C AlkEthOH_r1112 -CC1OCC(O1)(C)O AlkEthOH_r1113 -CC1OCC(O1)O AlkEthOH_r1114 -CC1OCCC(O1)(C)C AlkEthOH_r1115 -CC1OCCC(O1)(C)O AlkEthOH_r1116 -CC1OCCC(O1)O AlkEthOH_r1117 -CC1OCCCC(O1)(C)C AlkEthOH_r1118 -CC1OCCCC(O1)(C)O AlkEthOH_r1119 -CC1OCCCC(O1)O AlkEthOH_r1120 -CC1OCCCCC(O1)(C)C AlkEthOH_r1121 -CC1OCCCCC(O1)(C)O AlkEthOH_r1122 -CC1OCCCCC(O1)O AlkEthOH_r1123 -CC1OCCCCCC(O1)(C)C AlkEthOH_r1124 -CC1OCCCCCC(O1)(C)O AlkEthOH_r1125 -CC1OCCCCCC(O1)O AlkEthOH_r1126 -CC1OCCCCCCO1 AlkEthOH_r1127 -CC1OCCCCCO1 AlkEthOH_r1128 -CC1OCCCCCOO1 AlkEthOH_r1129 -CC1OCCCCO1 AlkEthOH_r1130 -CC1OCCCCOC(O1)(C)C AlkEthOH_r1131 -CC1OCCCCOC(O1)(C)O AlkEthOH_r1132 -CC1OCCCCOC(O1)C AlkEthOH_r1133 -CC1OCCCCOC(O1)O AlkEthOH_r1134 -CC1OCCCCOO1 AlkEthOH_r1135 -CC1OCCCO1 AlkEthOH_r1136 -CC1OCCCOCC(O1)(C)C AlkEthOH_r1137 -CC1OCCCOCC(O1)(C)O AlkEthOH_r1138 -CC1OCCCOCC(O1)O AlkEthOH_r1139 -CC1OCCCOCO1 AlkEthOH_r1140 -CC1OCCCOO1 AlkEthOH_r1141 -CC1OCCO1 AlkEthOH_r1142 -CC1OCCOC(O1)(C)C AlkEthOH_r1143 -CC1OCCOC(O1)(C)O AlkEthOH_r1144 -CC1OCCOC(O1)C AlkEthOH_r1145 -CC1OCCOC(O1)O AlkEthOH_r1146 -CC1OCCOCC(O1)(C)C AlkEthOH_r1147 -CC1OCCOCC(O1)(C)O AlkEthOH_r1148 -CC1OCCOCC(O1)O AlkEthOH_r1149 -CC1OCCOCO1 AlkEthOH_r1150 -CC1OCCOO1 AlkEthOH_r1151 -CC1OCO1 AlkEthOH_r1152 -CC1OCOCCC(O1)(C)C AlkEthOH_r1153 -CC1OCOCCC(O1)(C)O AlkEthOH_r1154 -CC1OCOCCC(O1)O AlkEthOH_r1155 diff --git a/espaloma/data/collection.py b/espaloma/data/collection.py index d7378e78..1a9eea31 100644 --- a/espaloma/data/collection.py +++ b/espaloma/data/collection.py @@ -23,12 +23,19 @@ def alkethoh(*args, **kwargs): import pandas as pd - path = os.path.dirname(esp.__file__) + "/data/alkethoh.smi" - df = pd.read_csv(path) + + df = pd.concat( + [ + pd.read_csv("https://raw.githubusercontent.com/openforcefield/open-forcefield-data/master/Model-Systems/AlkEthOH_distrib/AlkEthOH_rings.smi"), + pd.read_csv("https://raw.githubusercontent.com/openforcefield/open-forcefield-data/master/Model-Systems/AlkEthOH_distrib/AlkEthOH_chain.smi"), + ], + ) + smiles = df.iloc[:, 0] return esp.data.dataset.GraphDataset(smiles, *args, **kwargs) + def zinc(first=-1, *args, **kwargs): import tarfile from os.path import exists From 465cbecc1e1196562a81bcf3b0aebb8a3c23b9ff Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Tue, 30 Mar 2021 16:55:44 -0400 Subject: [PATCH 18/39] coefficients for linear mixture --- espaloma/graphs/legacy_force_field.py | 8 ++++---- espaloma/mm/bond.py | 2 +- espaloma/mm/functional.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/espaloma/graphs/legacy_force_field.py b/espaloma/graphs/legacy_force_field.py index f9e62cfd..919cf384 100644 --- a/espaloma/graphs/legacy_force_field.py +++ b/espaloma/graphs/legacy_force_field.py @@ -177,8 +177,8 @@ def _parametrize_gaff(self, g, n_max_phases=6): ) mol = g.mol - mol.assign_partial_charges("formal_charge") - + # mol.assign_partial_charges("formal_charge") + # create system sys = system_generator.create_system( topology=mol.to_topology().to_openmm(), @@ -302,7 +302,7 @@ def _parametrize_gaff(self, g, n_max_phases=6): if (idx0, idx1, idx2, idx3) in torsion_lookup: position = torsion_lookup[(idx0, idx1, idx2, idx3)] for sub_idx in range(n_max_phases): - if torsion_ks[position, sub_idx] == 0: + if torsion_ks[position, sub_idx] == 0: torsion_ks[position, sub_idx] = 0.5 * k.value_in_unit(esp.units.ENERGY_UNIT) torsion_phases[position, sub_idx] = phase.value_in_unit(esp.units.ANGLE_UNIT) torsion_periodicities[position, sub_idx] = periodicity @@ -374,7 +374,7 @@ def apply_torsion(node, n_max_phases=6): ''' return g - + def _parametrize_smirnoff(self, g): # mol = self._convert_to_off(mol) diff --git a/espaloma/mm/bond.py b/espaloma/mm/bond.py index 5530c0f0..df7e4fcb 100644 --- a/espaloma/mm/bond.py +++ b/espaloma/mm/bond.py @@ -44,4 +44,4 @@ def linear_mixture_bond(x, coefficients, phases): """ Bond energy with Linear basis function. """ - return esp.mm.functional.linear_mixture(x=x, coefficients=coefficients, phases=phases) + return 0.5 * esp.mm.functional.linear_mixture(x=x, coefficients=coefficients, phases=phases) diff --git a/espaloma/mm/functional.py b/espaloma/mm/functional.py index b58fd8bd..8d46bedb 100644 --- a/espaloma/mm/functional.py +++ b/espaloma/mm/functional.py @@ -281,6 +281,6 @@ def linear_mixture(x, coefficients, phases=[0.0, 1.0]): u1 = k1 * (x - b1) ** 2 u2 = k2 * (x - b2) ** 2 - u = u1 + u2 # - k1 * b1 ** 2 - k2 ** b2 ** 2 + b ** 2 + u = 0.5 * (u1 + u2) # - k1 * b1 ** 2 - k2 ** b2 ** 2 + b ** 2 return u From e51647f76b48485bd90bc811a45215852ead41cb Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Tue, 30 Mar 2021 16:58:51 -0400 Subject: [PATCH 19/39] gaff param --- espaloma/graphs/legacy_force_field.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/espaloma/graphs/legacy_force_field.py b/espaloma/graphs/legacy_force_field.py index f9e62cfd..49888c39 100644 --- a/espaloma/graphs/legacy_force_field.py +++ b/espaloma/graphs/legacy_force_field.py @@ -177,7 +177,7 @@ def _parametrize_gaff(self, g, n_max_phases=6): ) mol = g.mol - mol.assign_partial_charges("formal_charge") + # mol.assign_partial_charges("formal_charge") # create system sys = system_generator.create_system( From 26942a4c7f171474eea7c4949265441597a851f1 Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Wed, 31 Mar 2021 11:33:07 -0400 Subject: [PATCH 20/39] unit test on consistency between linear mixture and original --- espaloma/mm/angle.py | 2 +- espaloma/mm/energy.py | 4 +- espaloma/mm/tests/test_linear_combination.py | 78 ++++++++++++++++++++ 3 files changed, 81 insertions(+), 3 deletions(-) create mode 100644 espaloma/mm/tests/test_linear_combination.py diff --git a/espaloma/mm/angle.py b/espaloma/mm/angle.py index e73979bb..fc745bce 100644 --- a/espaloma/mm/angle.py +++ b/espaloma/mm/angle.py @@ -37,4 +37,4 @@ def linear_mixture_angle(x, coefficients, phases): """ Angle energy with Linear basis function. """ - return esp.mm.functional.linear_mixture(x=x, coefficients=coefficients, phases=phases) + return 0.5 * esp.mm.functional.linear_mixture(x=x, coefficients=coefficients, phases=phases) diff --git a/espaloma/mm/energy.py b/espaloma/mm/energy.py index 36244526..9a11b5db 100644 --- a/espaloma/mm/energy.py +++ b/espaloma/mm/energy.py @@ -184,7 +184,7 @@ def energy_in_graph( if "coefficients%s" % suffix in g.nodes["n2"].data: g.apply_nodes( - lambda node: apply_bond_linear_mixture(node, suffix=suffix, phases=[1.5, 4.0]), ntype="n2", + lambda node: apply_bond_linear_mixture(node, suffix=suffix, phases=[1.5, 6.0]), ntype="n2", ) else: g.apply_nodes( @@ -245,7 +245,7 @@ def energy_in_graph( lambda node: { "u%s" % suffix: sum( - node.data["u_%s%s" % (term, suffix)] for term in terms if "u_%s%s" % (term, suffix) in node.data + node.data["u_%s%s" % (term, suffix)] for term in terms if "u_%s%s" % (term, suffix) in node.data ) }, ntype="g", diff --git a/espaloma/mm/tests/test_linear_combination.py b/espaloma/mm/tests/test_linear_combination.py new file mode 100644 index 00000000..554c8439 --- /dev/null +++ b/espaloma/mm/tests/test_linear_combination.py @@ -0,0 +1,78 @@ +import pytest + +def test_linear_combination(): + import torch + import espaloma as esp + assert esp.mm.functional.linear_mixture( + 0.0, + torch.tensor([[0.0, 0.0]]), + ) == 0.0 + assert esp.mm.functional.linear_mixture( + 1.0, + torch.tensor([[1.0, 1.0]]), + [0.0, 2.0], + ) == 1.0 + +def test_consistency(): + import torch + import espaloma as esp + g = esp.Graph("CN1C=NC2=C1C(=O)N(C(=O)N2C)C") + + from espaloma.data.md import MoleculeVacuumSimulation + simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10) + g = simulation.run(g, in_place=True) + + g.nodes['n2'].data['coefficients'] = torch.randn( + g.heterograph.number_of_nodes("n2"), 2 + ).exp() + + g.nodes['n3'].data['coefficients'] = torch.randn( + g.heterograph.number_of_nodes("n3"), 2 + ).exp() + + esp.mm.geometry.geometry_in_graph(g.heterograph) + + esp.mm.energy.energy_in_graph(g.heterograph, terms=['n2', 'n3']) + + u0_2 = g.nodes['n2'].data['u'] - g.nodes['n2'].data['u'].mean(dim=1, keepdims=True) + u0_3 = g.nodes['n3'].data['u'] - g.nodes['n3'].data['u'].mean(dim=1, keepdims=True) + u0 = g.nodes['g'].data['u'] - g.nodes['g'].data['u'].mean(dim=1, keepdims=True) + + g.nodes['n2'].data['k'], g.nodes['n2'].data['eq'] = esp.mm.functional.linear_mixture_to_original( + g.nodes['n2'].data['coefficients'][:, 0][:, None], + g.nodes['n2'].data['coefficients'][:, 1][:, None], + 1.5, 6.0, + ) + + import math + g.nodes['n3'].data['k'], g.nodes['n3'].data['eq'] = esp.mm.functional.linear_mixture_to_original( + g.nodes['n3'].data['coefficients'][:, 0][:, None], + g.nodes['n3'].data['coefficients'][:, 1][:, None], + 0.0, math.pi, + ) + + g.nodes['n2'].data.pop('coefficients') + g.nodes['n3'].data.pop('coefficients') + + esp.mm.energy.energy_in_graph(g.heterograph, terms=['n2', 'n3']) + + u1_2 = g.nodes['n2'].data['u'] - g.nodes['n2'].data['u'].mean(dim=1, keepdims=True) + u1_3 = g.nodes['n3'].data['u'] - g.nodes['n3'].data['u'].mean(dim=1, keepdims=True) + u1 = g.nodes['g'].data['u'] - g.nodes['g'].data['u'].mean(dim=1, keepdims=True) + + import numpy.testing as npt + + npt.assert_almost_equal( + u0_2.detach().numpy(), u1_2.detach().numpy(), + decimal=3, + ) + + npt.assert_almost_equal( + u0_3.detach().numpy(), u1_3.detach().numpy(), + decimal=3, + ) + + npt.assert_almost_equal( + u0.detach().numpy(), u1.detach().numpy(), + decimal=3, + ) From e84310d87dbc5822ae822e70e6e2c40cbbb2030f Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Wed, 31 Mar 2021 11:43:04 -0400 Subject: [PATCH 21/39] linear basis --- espaloma/mm/energy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/espaloma/mm/energy.py b/espaloma/mm/energy.py index 36244526..3bd6959c 100644 --- a/espaloma/mm/energy.py +++ b/espaloma/mm/energy.py @@ -184,7 +184,7 @@ def energy_in_graph( if "coefficients%s" % suffix in g.nodes["n2"].data: g.apply_nodes( - lambda node: apply_bond_linear_mixture(node, suffix=suffix, phases=[1.5, 4.0]), ntype="n2", + lambda node: apply_bond_linear_mixture(node, suffix=suffix, phases=[1.5, 6.0]), ntype="n2", ) else: g.apply_nodes( From 57df1597ee4f7716916ce57c5f8ce390520cf283 Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Thu, 8 Apr 2021 17:10:56 -0400 Subject: [PATCH 22/39] md17 experiments --- scripts/md17/run.py | 11 +++++++++++ scripts/md17/run.sh | 5 +++++ 2 files changed, 16 insertions(+) create mode 100644 scripts/md17/run.py create mode 100644 scripts/md17/run.sh diff --git a/scripts/md17/run.py b/scripts/md17/run.py new file mode 100644 index 00000000..0bbc86b9 --- /dev/null +++ b/scripts/md17/run.py @@ -0,0 +1,11 @@ +import espaloma as esp + + +def run(name): + g = esp.data.md17_utils.get_molecule(name) + g.save(name) + +if __name__ == "__main__": + import sys + run(sys.argv[1]) + diff --git a/scripts/md17/run.sh b/scripts/md17/run.sh new file mode 100644 index 00000000..6bd0a2d0 --- /dev/null +++ b/scripts/md17/run.sh @@ -0,0 +1,5 @@ +for name in benzene uracil naphthalene aspirin salicylic malonaldehyde ethanol\ + toluene paracetamol azobenzene +do + bsub -W 8:00 -R "rusage[mem=10]" -o %J.stdout python run.py $name +done From f01e763be0f0c8f16dc744027516d88028a32112 Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Thu, 8 Apr 2021 19:59:07 -0400 Subject: [PATCH 23/39] md17 --- scripts/md17/generate.py | 11 +++ scripts/md17/{run.sh => generate.sh} | 0 scripts/md17/run.py | 132 +++++++++++++++++++++++++-- 3 files changed, 137 insertions(+), 6 deletions(-) create mode 100644 scripts/md17/generate.py rename scripts/md17/{run.sh => generate.sh} (100%) diff --git a/scripts/md17/generate.py b/scripts/md17/generate.py new file mode 100644 index 00000000..0bbc86b9 --- /dev/null +++ b/scripts/md17/generate.py @@ -0,0 +1,11 @@ +import espaloma as esp + + +def run(name): + g = esp.data.md17_utils.get_molecule(name) + g.save(name) + +if __name__ == "__main__": + import sys + run(sys.argv[1]) + diff --git a/scripts/md17/run.sh b/scripts/md17/generate.sh similarity index 100% rename from scripts/md17/run.sh rename to scripts/md17/generate.sh diff --git a/scripts/md17/run.py b/scripts/md17/run.py index 0bbc86b9..f08538c0 100644 --- a/scripts/md17/run.py +++ b/scripts/md17/run.py @@ -1,11 +1,131 @@ +import torch import espaloma as esp +import copy + +def run(args): + g = esp.Graph.load(args.name) + n_snapshot = g.nodes['g'].data['u_ref'].shape[-1] + + idxs = list(range(n_snapshot)) + import random + random.shuffle(idxs) + idxs_tr = idxs[:args.first] + + g_tr = copy.deepcopy(g) + g_tr.nodes['n1'].data['xyz'] = g_tr.nodes['n1'].data['xyz'][:, idxs_tr, :] + g_tr.nodes['g'].data['u_ref'] = g_tr.nodes['g'].data['u_ref'][:, idxs_tr] + + # layer + layer = esp.nn.layers.dgl_legacy.gn(args.layer) + + # representation + representation = esp.nn.Sequential(layer, config=args.config) + + # get the last bit of units + units = [int(x) for x in args.config if isinstance(x, int)][-1] + + janossy_config = [] + for x in args.janossy_config: + if isinstance(x, int): + janossy_config.append(int(x)) + + elif x.isdigit(): + janossy_config.append(int(x)) + + else: + janossy_config.append(x) + + print(janossy_config) + + readout = esp.nn.readout.janossy.JanossyPooling( + in_features=units, config=janossy_config, + out_features={ + 2: {'log_coefficients': 2}, + 3: {'log_coefficients': 2}, + 4: {'k': 6}, + }, + ) + + readout_improper = esp.nn.readout.janossy.JanossyPoolingImproper( + in_features=units, config=janossy_config + ) + + + class ExpCoeff(torch.nn.Module): + def forward(self, g): + g.nodes['n2'].data['coefficients'] = g.nodes['n2'].data['log_coefficients'].exp() + g.nodes['n3'].data['coefficients'] = g.nodes['n3'].data['log_coefficients'].exp() + return g + + net = torch.nn.Sequential( + representation, + readout, + ExpCoeff(), + esp.mm.geometry.GeometryInGraph(), + esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4",]), + esp.mm.energy.EnergyInGraph(suffix='_ref', terms=["n2", "n3", "n4",]), + ) + + + torch.nn.init.normal_( + net[1].f_out_2_to_log_coefficients.bias, + mean=-5, + ) + + torch.nn.init.normal_( + net[1].f_out_3_to_log_coefficients.bias, + mean=-5, + ) + + + metrics_tr = [ + esp.metrics.GraphMetric( + base_metric=esp.metrics.center(torch.nn.MSELoss(reduction='none'), reduction="mean" +), + between=['u', "u_ref"], + level="g", + ), + ] + + metrics_te = [ + esp.metrics.GraphMetric( + base_metric=esp.metrics.center(esp.metrics.rmse), + between=['u', "u_ref"], + level="g", + ), + ] + + + optimizer = torch.optim.Adam(net.parameters(), args.lr) + + exp = esp.TrainAndTest( + ds_tr=esp.data.GraphDataset([g_tr]), + ds_te=esp.data.GraphDataset([g]), + net=net, + metrics_tr=metrics_tr, + metrics_te=metrics_te, + n_epochs=args.n_epochs, + normalize=esp.data.normalize.NotNormalize, + record_interval=100, + optimizer=optimizer, + device=torch.device('cuda'), + ) -def run(name): - g = esp.data.md17_utils.get_molecule(name) - g.save(name) if __name__ == "__main__": - import sys - run(sys.argv[1]) - + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--mol", type=str, default="benzene") + parser.add_argument("--first", type=int, default=1) + parser.add_argument("--layer", default="SAGEConv", type=str) + parser.add_argument("--lr", type=float, default=1e-4) + parser.add_argument( + "--config", nargs="*", default=[128, "relu", 128, "relu", 128, "relu"], + ) + parser.add_argument( + "--janossy_config", nargs="*", default=[32, "leaky_relu"], + ) + args = parser.parse_args() + run(args) From ab13b4d58201e31426c49cab9b0d0d0cd01f71c7 Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Thu, 8 Apr 2021 20:29:08 -0400 Subject: [PATCH 24/39] md17 submission --- scripts/md17/run.sh | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 scripts/md17/run.sh diff --git a/scripts/md17/run.sh b/scripts/md17/run.sh new file mode 100644 index 00000000..6dbe2b8d --- /dev/null +++ b/scripts/md17/run.sh @@ -0,0 +1,8 @@ +for name in benzene +do + for first in 1 10 100 1000 10000 25000 50000 100000 + do + bsub -q gpuqueue -o %J.stdout -gpu "num=1:j_exclusive=yes" -R "rusage[mem=10] span[ptile=1]" -W 8:00 -n 1\ + python run.py --name $name --first $first + done +done From 5de8fc38adc382aff13f95e8059494dd36b01fcc Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Sat, 10 Apr 2021 01:47:40 -0400 Subject: [PATCH 25/39] md17 scripts --- espaloma/data/collection.py | 15 ++- espaloma/data/md.py | 143 +++++++++++++++++++- scripts/md17/run.py | 46 ++++++- scripts/md17/run.sh | 11 +- scripts/md17/test.py | 133 ++++++++++++++++++ scripts/qca_qm/run.py | 262 ++++++++++++++++++++++++++++++++++++ scripts/qca_qm/run.sh | 19 +++ 7 files changed, 610 insertions(+), 19 deletions(-) create mode 100644 scripts/md17/test.py create mode 100644 scripts/qca_qm/run.py create mode 100644 scripts/qca_qm/run.sh diff --git a/espaloma/data/collection.py b/espaloma/data/collection.py index 1a9eea31..c66f36da 100644 --- a/espaloma/data/collection.py +++ b/espaloma/data/collection.py @@ -26,12 +26,13 @@ def alkethoh(*args, **kwargs): df = pd.concat( [ - pd.read_csv("https://raw.githubusercontent.com/openforcefield/open-forcefield-data/master/Model-Systems/AlkEthOH_distrib/AlkEthOH_rings.smi"), - pd.read_csv("https://raw.githubusercontent.com/openforcefield/open-forcefield-data/master/Model-Systems/AlkEthOH_distrib/AlkEthOH_chain.smi"), + pd.read_csv("https://raw.githubusercontent.com/openforcefield/open-forcefield-data/master/Model-Systems/AlkEthOH_distrib/AlkEthOH_rings.smi", header=None), + pd.read_csv("https://raw.githubusercontent.com/openforcefield/open-forcefield-data/master/Model-Systems/AlkEthOH_distrib/AlkEthOH_chain.smi", header=None), ], + axis=0, ) - smiles = df.iloc[:, 0] + smiles = df.iloc[:, 0].values return esp.data.dataset.GraphDataset(smiles, *args, **kwargs) @@ -78,13 +79,13 @@ def md17_old(*args, **kwargs): return [ esp.data.md17_utils.get_molecule( name, *args, **kwargs - ).heterograph for name in [ - # 'benzene', + ) for name in [ + 'benzene', 'uracil', 'naphthalene', 'aspirin', 'salicylic', 'malonaldehyde', - # 'ethanol', + 'ethanol', 'toluene', 'paracetamol', 'azobenzene' ]] @@ -94,7 +95,7 @@ def md17_new(*args, **kwargs): esp.data.md17_utils.get_molecule( name, *args, **kwargs ).heterograph for name in [ - # 'paracetamol', 'azobenzene', + 'paracetamol', 'azobenzene', 'benzene', 'ethanol', ]] diff --git a/espaloma/data/md.py b/espaloma/data/md.py index 6ee51d4c..409ba642 100644 --- a/espaloma/data/md.py +++ b/espaloma/data/md.py @@ -64,6 +64,9 @@ def subtract_nonbonded_force( id1, id2, id3, angle, k = force.getAngleParameters(idx) force.setAngleParameters(idx, id1, id2, id3, angle, 0.0) + force.updateParametersInContext(simulation.context) + + elif "Bond" in name: for idx in range(force.getNumBonds()): id1, id2, length, k = force.getBondParameters(idx) @@ -71,6 +74,9 @@ def subtract_nonbonded_force( idx, id1, id2, length, 0.0, ) + force.updateParametersInContext(simulation.context) + + elif "Torsion" in name: for idx in range(force.getNumTorsions()): ( @@ -86,7 +92,8 @@ def subtract_nonbonded_force( idx, id1, id2, id3, id4, periodicity, phase, 0.0, ) - force.updateParametersInContext(simulation.context) + force.updateParametersInContext(simulation.context) + # the snapshots xs = ( @@ -140,6 +147,140 @@ def subtract_nonbonded_force( return g +def subtract_nonbonded_force_except_14( + g, forcefield="gaff-1.81", +): + + # parameterize topology + topology = g.mol.to_topology().to_openmm() + + generator = SystemGenerator( + small_molecule_forcefield=forcefield, + molecules=[g.mol], + ) + + # create openmm system + system = generator.create_system( + topology, + ) + + # use langevin integrator, although it's not super useful here + integrator = openmm.LangevinIntegrator( + TEMPERATURE, COLLISION_RATE, STEP_SIZE + ) + + # create simulation + simulation = Simulation( + topology=topology, system=system, integrator=integrator + ) + + # get forces + forces = list(system.getForces()) + + # loop through forces + for force in forces: + name = force.__class__.__name__ + + # turn off angle + if "Angle" in name: + for idx in range(force.getNumAngles()): + id1, id2, id3, angle, k = force.getAngleParameters(idx) + force.setAngleParameters(idx, id1, id2, id3, angle, 0.0) + + force.updateParametersInContext(simulation.context) + + + elif "Bond" in name: + for idx in range(force.getNumBonds()): + id1, id2, length, k = force.getBondParameters(idx) + force.setBondParameters( + idx, id1, id2, length, 0.0, + ) + + force.updateParametersInContext(simulation.context) + + + elif "Torsion" in name: + for idx in range(force.getNumTorsions()): + ( + id1, + id2, + id3, + id4, + periodicity, + phase, + k, + ) = force.getTorsionParameters(idx) + force.setTorsionParameters( + idx, id1, id2, id3, id4, periodicity, phase, 0.0, + ) + + force.updateParametersInContext(simulation.context) + + + elif "Nonbonded" in name: + for exception_index in range(force.getNumExceptions()): + p1, p2, chargeprod, sigma, epsilon = force.getExceptionParameters(exception_index) + force.setExceptionParameters(exception_index, p1, p2, chargeprod, sigma, 1e-8*epsilon) + + + force.updateParametersInContext(simulation.context) + + + + # the snapshots + xs = ( + Quantity( + g.nodes["n1"].data["xyz"].detach().numpy(), + esp.units.DISTANCE_UNIT, + ) + .value_in_unit(unit.nanometer) + .transpose((1, 0, 2)) + ) + + # loop through the snapshots + energies = [] + derivatives = [] + + for x in xs: + simulation.context.setPositions(x) + + state = simulation.context.getState( + getEnergy=True, getParameters=True, getForces=True, + ) + + energy = state.getPotentialEnergy().value_in_unit( + esp.units.ENERGY_UNIT, + ) + + derivative = state.getForces(asNumpy=True).value_in_unit( + esp.units.FORCE_UNIT, + ) + + energies.append(energy) + derivatives.append(derivative) + + # put energies to a tensor + energies = torch.tensor( + energies, dtype=torch.get_default_dtype(), + ).flatten()[None, :] + derivatives = torch.tensor( + np.stack(derivatives, axis=1), dtype=torch.get_default_dtype(), + ) + + # subtract the energies + g.heterograph.apply_nodes( + lambda node: {"u_ref": node.data["u_ref"] - energies}, ntype="g", + ) + + g.heterograph.apply_nodes( + lambda node: {"u_ref_prime": node.data["u_ref_prime"] - derivatives}, + ntype="n1", + ) + + return g + + # ============================================================================= # MODULE CLASSES diff --git a/scripts/md17/run.py b/scripts/md17/run.py index f08538c0..fe7f99df 100644 --- a/scripts/md17/run.py +++ b/scripts/md17/run.py @@ -1,6 +1,7 @@ import torch import espaloma as esp import copy +import numpy as np def run(args): g = esp.Graph.load(args.name) @@ -10,11 +11,17 @@ def run(args): import random random.shuffle(idxs) idxs_tr = idxs[:args.first] + idxs_te = idxs[-10000:] + g_tr = copy.deepcopy(g) g_tr.nodes['n1'].data['xyz'] = g_tr.nodes['n1'].data['xyz'][:, idxs_tr, :] g_tr.nodes['g'].data['u_ref'] = g_tr.nodes['g'].data['u_ref'][:, idxs_tr] - + + g_te = copy.deepcopy(g) + g_te.nodes['n1'].data['xyz'] = g_te.nodes['n1'].data['xyz'][:, idxs_te, :] + g_te.nodes['g'].data['u_ref'] = g_te.nodes['g'].data['u_ref'][:, idxs_te] + # layer layer = esp.nn.layers.dgl_legacy.gn(args.layer) @@ -60,10 +67,10 @@ def forward(self, g): net = torch.nn.Sequential( representation, readout, + readout_improper, ExpCoeff(), esp.mm.geometry.GeometryInGraph(), - esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4",]), - esp.mm.energy.EnergyInGraph(suffix='_ref', terms=["n2", "n3", "n4",]), + esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4", "n4_improper", ]), ) @@ -99,8 +106,8 @@ def forward(self, g): optimizer = torch.optim.Adam(net.parameters(), args.lr) exp = esp.TrainAndTest( - ds_tr=esp.data.GraphDataset([g_tr]), - ds_te=esp.data.GraphDataset([g]), + ds_tr=esp.data.dataset.GraphDataset([g_tr]).view(batch_size=1), + ds_te=esp.data.dataset.GraphDataset([g_te]).view(batch_size=1), net=net, metrics_tr=metrics_tr, metrics_te=metrics_te, @@ -112,12 +119,31 @@ def forward(self, g): ) + results = exp.run() + + fuck + + print(esp.app.report.markdown(results), flush=True) + + curves = esp.app.report.curve(results) + + import os + os.mkdir(args.out) + for spec, curve in curves.items(): + np.save(args.out + "/" + "_".join(spec) + ".npy", curve) + + net = net.cpu() + + import os + torch.save(net.state_dict(), args.out + "/net.th") + + if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() - parser.add_argument("--mol", type=str, default="benzene") + parser.add_argument("--name", type=str, default="benzene") parser.add_argument("--first", type=int, default=1) parser.add_argument("--layer", default="SAGEConv", type=str) parser.add_argument("--lr", type=float, default=1e-4) @@ -125,7 +151,13 @@ def forward(self, g): "--config", nargs="*", default=[128, "relu", 128, "relu", 128, "relu"], ) parser.add_argument( - "--janossy_config", nargs="*", default=[32, "leaky_relu"], + "--janossy_config", nargs="*", default=[128, "relu", 128, "relu", 128, "relu"], + ) + parser.add_argument( + "--n_epochs", type=int, default=10, + ) + parser.add_argument( + "--out", type=str, default="results", ) args = parser.parse_args() run(args) diff --git a/scripts/md17/run.sh b/scripts/md17/run.sh index 6dbe2b8d..4dcdb533 100644 --- a/scripts/md17/run.sh +++ b/scripts/md17/run.sh @@ -1,8 +1,11 @@ -for name in benzene +for name in malonaldehyde # benzene toluene malonaldehyde salicylic aspirin ethanol uracil do - for first in 1 10 100 1000 10000 25000 50000 100000 + for first in 50000 # 1 10 100 1000 10000 25000 50000 100000 do - bsub -q gpuqueue -o %J.stdout -gpu "num=1:j_exclusive=yes" -R "rusage[mem=10] span[ptile=1]" -W 8:00 -n 1\ - python run.py --name $name --first $first + for repeat in 0 # 1 2 3 4 + do + bsub -q gpuqueue -o %J.stdout -gpu "num=1:j_exclusive=yes" -R V100 -R "rusage[mem=10] span[ptile=1]" -W 12:00 -n 1\ + python run.py --name $name --first $first --n_epoch 3000 --out "_"$name"_"$first"_"$repeat + done done done diff --git a/scripts/md17/test.py b/scripts/md17/test.py new file mode 100644 index 00000000..c34f907c --- /dev/null +++ b/scripts/md17/test.py @@ -0,0 +1,133 @@ +import torch +import espaloma as esp +import copy +import numpy as np + +def number(args, train_name, test_name): + g_te = esp.Graph.load(test_name) + n_snapshot = g_te.nodes['g'].data['u_ref'].shape[-1] + + idxs = list(range(n_snapshot)) + import random + random.shuffle(idxs) + idxs_te = idxs[-1000:] + g_te.nodes['n1'].data['xyz'] = g_te.nodes['n1'].data['xyz'][:, idxs_te, :] + g_te.nodes['g'].data['u_ref'] = g_te.nodes['g'].data['u_ref'][:, idxs_te] + + # layer + layer = esp.nn.layers.dgl_legacy.gn(args.layer) + + # representation + representation = esp.nn.Sequential(layer, config=args.config) + + # get the last bit of units + units = [int(x) for x in args.config if isinstance(x, int)][-1] + + janossy_config = [] + for x in args.janossy_config: + if isinstance(x, int): + janossy_config.append(int(x)) + + elif x.isdigit(): + janossy_config.append(int(x)) + + else: + janossy_config.append(x) + + + readout = esp.nn.readout.janossy.JanossyPooling( + in_features=units, config=janossy_config, + out_features={ + 2: {'log_coefficients': 2}, + 3: {'log_coefficients': 2}, + 4: {'k': 6}, + }, + ) + + readout_improper = esp.nn.readout.janossy.JanossyPoolingImproper( + in_features=units, config=janossy_config + ) + + + class ExpCoeff(torch.nn.Module): + def forward(self, g): + g.nodes['n2'].data['coefficients'] = g.nodes['n2'].data['log_coefficients'].exp() + g.nodes['n3'].data['coefficients'] = g.nodes['n3'].data['log_coefficients'].exp() + return g + + net = torch.nn.Sequential( + representation, + readout, + readout_improper, + ExpCoeff(), + esp.mm.geometry.GeometryInGraph(), + esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4", "n4_improper", ]), + ).cuda() + + net.load_state_dict( + torch.load( + "_%s_1000_0/net.th" % train_name, + # map_location="cpu", + ) + ) + + g_te.heterograph = g_te.heterograph.to("cuda") + + net(g_te.heterograph) + + return "%.2f" % esp.metrics.rmse( + 627.5 * (g_te.nodes['g'].data['u_ref'] - g_te.nodes['g'].data['u_ref'].mean()), + 627.5 * (g_te.nodes['g'].data['u'] - g_te.nodes['g'].data['u'].mean()), + ) + + +def run(args): + mols = [ + "benzene", + "toluene", + "malonaldehyde", + "salicylic", + "aspirin", + "ethanol", + "uracil", + "naphthalene", + ] + + + import pandas as pd + + df = pd.DataFrame(columns=mols) + + + for mol0 in mols: + for mol1 in mols: + x = number(args, mol0, mol1) + print(mol0, mol1, x) + df[mol0][mol1] = number(args, mol0, mol1) + + print(df) + + +if __name__ == "__main__": + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--test_name", type=str, default="benzene") + parser.add_argument("--train_name", type=str, default="benzene") + parser.add_argument("--first", type=int, default=1) + parser.add_argument("--layer", default="SAGEConv", type=str) + parser.add_argument("--lr", type=float, default=1e-4) + parser.add_argument( + "--config", nargs="*", default=[128, "relu", 128, "relu", 128, "relu"], + ) + parser.add_argument( + "--janossy_config", nargs="*", default=[128, "relu", 128, "relu", 128, "relu"], + ) + parser.add_argument( + "--n_epochs", type=int, default=10, + ) + parser.add_argument( + "--out", type=str, default="results", + ) + args = parser.parse_args() + run(args) diff --git a/scripts/qca_qm/run.py b/scripts/qca_qm/run.py new file mode 100644 index 00000000..0f4370f2 --- /dev/null +++ b/scripts/qca_qm/run.py @@ -0,0 +1,262 @@ +# ============================================================================= +# IMPORTS +# ============================================================================= +import argparse +import os +import math +import numpy as np +import torch +import dgl + +import espaloma as esp + +def run(args): + ''' + # define data + ds_tr = esp.data.qca.pfizer() + ds_vl = esp.data.qca.coverage() + print(len(ds_tr)) + print(len(ds_vl)) + print("loaded", flush=True) + + # get force field + forcefield = esp.graphs.legacy_force_field.LegacyForceField( + args.forcefield + ) + + # param / typing + operation = forcefield.parametrize + + # apply to dataset + ds_tr = ds_tr.apply(operation, in_place=True) + ds_vl = ds_vl.apply(operation, in_place=True) + print("parametrized", flush=True) + + # apply simulation + # make simulation + from espaloma.data.md import MoleculeVacuumSimulation + simulation = MoleculeVacuumSimulation( + n_samples=500, n_steps_per_sample=100, + ) + print("simulated", flush=True) + + ds_tr = ds_tr.apply(simulation.run, in_place=True) + ds_vl = ds_vl.apply(simulation.run, in_place=True) + + ds_tr.save('pfizer') + ds_vl.save('coverage') + ''' + + pfizer = esp.data.dataset.GraphDataset.load('pfizer') + coverage = esp.data.dataset.GraphDataset.load('coverage') + emolecules = esp.data.dataset.GraphDataset.load('emolecules') + bayer = esp.data.dataset.GraphDataset.load('bayer') + roche = esp.data.dataset.GraphDataset.load('roche') + fda = esp.data.dataset.GraphDataset.load('fda') + # benchmark = esp.data.dataset.GraphDataset.load("benchmark") + + + _ds_tr = pfizer + coverage + emolecules + bayer + roche + _ds_vl = fda # benchmark + + print(len(_ds_tr)) + print(len(_ds_vl)) + + # batch + ds_tr = _ds_tr.view("graph", batch_size=100, shuffle=False) + ds_vl = _ds_vl.view("graph", batch_size=100) + + g = next(iter(ds_tr)) + esp.mm.geometry.geometry_in_graph(g) + + # layer + layer = esp.nn.layers.dgl_legacy.gn(args.layer) + + # representation + representation = esp.nn.Sequential(layer, config=args.config) + + # get the last bit of units + units = [int(x) for x in args.config if isinstance(x, int) or (isinstance(x, str) and x.isdigit())][-1] + + janossy_config = [] + for x in args.janossy_config: + if isinstance(x, int): + janossy_config.append(int(x)) + + elif x.isdigit(): + janossy_config.append(int(x)) + + else: + janossy_config.append(x) + + readout = esp.nn.readout.janossy.JanossyPooling( + in_features=units, config=janossy_config, + out_features={ + 2: {'log_coefficients': 2}, + 3: { + 'log_coefficients': 2, + }, + 4: { + 'k': 6, + }, + }, + ) + + readout_improper = esp.nn.readout.janossy.JanossyPoolingImproper( + in_features=units, config=janossy_config + ) + + readout_14 = esp.nn.readout.janossy.JanossyPooling14( + in_features=units, config=janossy_config, + ) + + class ExpCoeff(torch.nn.Module): + def forward(self, g): + g.nodes['n2'].data['coefficients'] = g.nodes['n2'].data['log_coefficients'].exp() + g.nodes['n3'].data['coefficients'] = g.nodes['n3'].data['log_coefficients'].exp() + return g + + net = torch.nn.Sequential( + representation, + readout, + readout_improper, + # readout_14, + ExpCoeff(), + esp.mm.geometry.GeometryInGraph(), + esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4", "n4_improper"]), + ) + + + torch.nn.init.normal_( + net[1].f_out_2_to_log_coefficients.bias, + mean=-5, + ) + + torch.nn.init.normal_( + net[1].f_out_3_to_log_coefficients.bias, + mean=-5, + ) + + + # net = net.cuda() + metrics_tr = [ + esp.metrics.GraphMetric( + base_metric=esp.metrics.center(torch.nn.MSELoss(reduction='none'), reduction="mean"), + between=['u', "u_ref"], + level="g", + ), + ] + + metrics_te = [ + esp.metrics.GraphMetric( + base_metric=esp.metrics.center(esp.metrics.rmse), + between=['u', "u_ref"], + level="g", + ), + ] + + + optimizer = torch.optim.Adam(net.parameters(), args.lr) + + exp = esp.TrainAndTest( + ds_tr=ds_tr, + ds_te=ds_vl, + net=net, + metrics_tr=metrics_tr, + metrics_te=metrics_te, + n_epochs=args.n_epochs, + normalize=esp.data.normalize.NotNormalize, + record_interval=100, + optimizer=optimizer, + device=torch.device('cuda'), + ) + + results = exp.run() + + print(esp.app.report.markdown(results), flush=True) + + curves = esp.app.report.curve(results) + + import os + os.mkdir(args.out) + for spec, curve in curves.items(): + np.save(args.out + "/" + "_".join(spec) + ".npy", curve) + + net = net.cpu() + + import pandas as pd + df = pd.DataFrame(columns=["SMILES", "RMSE", "n_snapshots"]) + import os + torch.save(net.state_dict(), args.out + "/net.th") + + + + + for state_name, state in exp.states.items(): + torch.save(state, args.out + "/net%s.th" % state_name) + + + + + + ''' + for g in _ds_tr: + net(g.heterograph) + + _df = { + 'SMILES': g.mol.to_smiles(), + 'RMSE': 625 * esp.metrics.center(esp.metrics.rmse)(g.nodes['g'].data['u_ref'], g.nodes['g'].data['u']).cpu().detach().numpy().item(), + 'n_snapshots': g.nodes['n1'].data['xyz'].shape[1] + } + + df = df.append(_df, ignore_index=True) + + df.to_csv(args.out + "/inspect_tr_%s.csv" % args.first) + + for g in _ds_vl: + net(g.heterograph) + + _df = { + 'SMILES': g.mol.to_smiles(), + 'RMSE': 625 * esp.metrics.center(esp.metrics.rmse)(g.nodes['g'].data['u_ref'], g.nodes['g'].data['u']).cpu().detach().numpy().item(), + 'n_snapshots': g.nodes['n1'].data['xyz'].shape[1] + } + + df = df.append(_df, ignore_index=True) + + df.to_csv(args.out + "/inspect_vl_%s.csv" % args.first) + ''' +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--forcefield", default="gaff-1.81", type=str) + parser.add_argument("--layer", default="GraphConv", type=str) + parser.add_argument("--n_classes", default=100, type=int) + parser.add_argument( + "--config", nargs="*", default=[128, "leaky_relu", 128, "leaky_relu", 128, "leaky_relu"], + ) + + parser.add_argument( + "--training_metrics", nargs="*", default=["TypingCrossEntropy"] + ) + parser.add_argument( + "--test_metrics", nargs="*", default=["TypingAccuracy"] + ) + parser.add_argument( + "--out", default="results", type=str + ) + parser.add_argument("--janossy_config", nargs="*", default=[32, "leaky_relu"]) + + parser.add_argument("--graph_act", type=str, default="leaky_relu") + + parser.add_argument("--n_epochs", default=10, type=int) + + parser.add_argument("--weight", default=1.0, type=float) + + parser.add_argument("--lr", default=1e-5, type=float) + parser.add_argument("--batch_size", default=32, type=float) + parser.add_argument("--first", default=32, type=int) + args = parser.parse_args() + + run(args) diff --git a/scripts/qca_qm/run.sh b/scripts/qca_qm/run.sh new file mode 100644 index 00000000..1de2c2d3 --- /dev/null +++ b/scripts/qca_qm/run.sh @@ -0,0 +1,19 @@ +#BSUB -q gpuqueue +#BSUB -o %J.stdout +#BSUB -gpu "num=1:j_exclusive=yes" +#BSUB -R "rusage[mem=30] span[ptile=1]" +#BSUB -W 24:00 +#BSUB -n 1 + +n_epochs=5000 +layer="SAGEConv" +units=128 +act="relu" +lr=1e-4 +graph_act="relu" +weight=1.0 + +python run.py --n_epochs $n_epochs --layer $layer --config $units $act $units $act $units $act --janossy_config $units $act $units $act $units $act --weight $weight --out $units"_"$layer"_"$act"_"$weight"_"$lr"_"$n_epochs"_"$small_batch"_"$big_batch"_single_gpu_janossy_first_distributed" --lr $lr --graph_act $act + + + From 78fafdb64020f568a59808d0908284d892df06cc Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Sat, 10 Apr 2021 01:48:10 -0400 Subject: [PATCH 26/39] energy --- espaloma/mm/energy.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/espaloma/mm/energy.py b/espaloma/mm/energy.py index 9a11b5db..3844ad93 100644 --- a/espaloma/mm/energy.py +++ b/espaloma/mm/energy.py @@ -141,7 +141,7 @@ def apply_nonbonded(nodes, scaling=1.0, suffix=""): return { "u%s" % suffix: scaling - * esp.mm.nonbonded.lj_9_6( + * esp.mm.nonbonded.lj_12_6( x=nodes.data["x"], sigma=nodes.data["sigma%s" % suffix], epsilon=nodes.data["epsilon%s" % suffix], @@ -175,10 +175,6 @@ def energy_in_graph( # TODO: this is all very restricted for now # we need to make this better - if "nonbonded" in terms or "onefour" in terms: - # apply combination rule - esp.mm.nonbonded.lorentz_berthelot(g, suffix=suffix) - if "n2" in terms: # apply energy function From 1c7888c71675f7d31ae4d6f5054b337d58142070 Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Tue, 13 Apr 2021 10:40:46 -0400 Subject: [PATCH 27/39] minimum fitting --- scripts/mm_alkethoh_torsion_min/run.py | 241 +++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 scripts/mm_alkethoh_torsion_min/run.py diff --git a/scripts/mm_alkethoh_torsion_min/run.py b/scripts/mm_alkethoh_torsion_min/run.py new file mode 100644 index 00000000..0adf3235 --- /dev/null +++ b/scripts/mm_alkethoh_torsion_min/run.py @@ -0,0 +1,241 @@ +# ============================================================================= +# IMPORTS +# ============================================================================= +import argparse +import os + +import numpy as np +import torch + +import espaloma as esp + +def run(args): + ''' + # define data + ds_tr = esp.data.qca.pfizer() + ds_vl = esp.data.qca.coverage() + print(len(ds_tr)) + print(len(ds_vl)) + print("loaded", flush=True) + + # get force field + forcefield = esp.graphs.legacy_force_field.LegacyForceField( + args.forcefield + ) + + # param / typing + operation = forcefield.parametrize + + # apply to dataset + ds_tr = ds_tr.apply(operation, in_place=True) + ds_vl = ds_vl.apply(operation, in_place=True) + print("parametrized", flush=True) + + # apply simulation + # make simulation + from espaloma.data.md import MoleculeVacuumSimulation + simulation = MoleculeVacuumSimulation( + n_samples=500, n_steps_per_sample=100, + ) + print("simulated", flush=True) + + ds_tr = ds_tr.apply(simulation.run, in_place=True) + ds_vl = ds_vl.apply(simulation.run, in_place=True) + + ds_tr.save('pfizer') + ds_vl.save('coverage') + ''' + + ds = esp.data.dataset.GraphDataset.load("alkethoh") + ds.shuffle(seed=2666) + ds_tr, ds_vl, ds_te = ds.split([8, 1, 1]) + + # batch + ds_tr = ds_tr.view("graph", batch_size=100, shuffle=True) + ds_vl = ds_vl.view("graph", batch_size=100, shuffle=True) + ds_te = ds_te.view("graph", batch_size=100, shuffle=True) + + # layer + layer = esp.nn.layers.dgl_legacy.gn(args.layer) + + # representation + representation = esp.nn.Sequential(layer, config=args.config) + + # get the last bit of units + units = [int(x) for x in args.config if isinstance(x, int) or (isinstance(x, str) and x.isdigit())][-1] + + janossy_config = [] + for x in args.janossy_config: + if isinstance(x, int): + janossy_config.append(int(x)) + + elif x.isdigit(): + janossy_config.append(int(x)) + + else: + janossy_config.append(x) + + readout = esp.nn.readout.janossy.JanossyPooling( + in_features=units, config=janossy_config, + out_features={ + 2: {'log_coefficients': 2}, + 3: {'log_coefficients': 2}, + 4: {'k': 6} + }, + ) + + class ExpCoeff(torch.nn.Module): + def forward(self, g): + import math + g.nodes['n2'].data['_coefficients'] = g.nodes['n2'].data['log_coefficients'].exp() + g.nodes['n3'].data['_coefficients'] = g.nodes['n3'].data['log_coefficients'].exp() + + g.nodes['n2'].data['k'], g.nodes['n2'].data['eq'] = esp.mm.functional.linear_mixture_to_original( + g.nodes['n2'].data['_coefficients'][:, 0][:, None], + g.nodes['n2'].data['_coefficients'][:, 1][:, None], + 1.5, 6.0 + ) + + g.nodes['n3'].data['k'], g.nodes['n3'].data['eq'] = esp.mm.functional.linear_mixture_to_original( + g.nodes['n3'].data['_coefficients'][:, 0][:, None], + g.nodes['n3'].data['_coefficients'][:, 1][:, None], + 0.0, math.pi, + ) + + return g + + + net = torch.nn.Sequential( + representation, + readout, + ExpCoeff(), + esp.mm.geometry.GeometryInGraph(), + esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4"]), + esp.mm.energy.EnergyInGraph(suffix='_ref', terms=["n2", "n3", "n4"]), + ) + + torch.nn.init.normal_( + net[1].f_out_2_to_log_coefficients.bias, + mean=-5, + ) + + torch.nn.init.normal_( + net[1].f_out_3_to_log_coefficients.bias, + mean=-5, + ) + + torch.nn.init.normal_( + net[1].f_out_4_to_k.bias, + std=1e-3, + ) + + torch.nn.init.normal_( + net[1].f_out_4_to_k.weight, + std=1e-3, + ) + + net = net.cuda() + + + def rmse_min(input, target): + return torch.nn.MSELoss()( + input.min(dim=-1)[0], + target.min(dim=-1)[0], + ) + + metrics_tr = [ + esp.metrics.GraphMetric( + base_metric=esp.metrics.center(torch.nn.MSELoss(reduction='none'), reduction="mean"), + between=['u', "u_ref"], + level="g", + ), + + esp.metrics.GraphMetric( + base_metric=rmse_min, + between=['u', 'u_ref'], + level='g', + ), + ] + + metrics_te = [ + esp.metrics.GraphMetric( + base_metric=esp.metrics.center(esp.metrics.rmse), + between=['u', "u_ref"], + level="g", + ), + esp.metrics.GraphMetric( + base_metric=esp.metrics.rmse, + between=['u', "u_ref"], + level="g", + ), + ] + + + optimizer = torch.optim.Adam(net.parameters(), args.lr) + + exp = esp.TrainAndTest( + ds_tr=ds_tr, + ds_te=ds_te, + ds_vl=ds_vl, + net=net, + metrics_tr=metrics_tr, + metrics_te=metrics_te, + n_epochs=args.n_epochs, + normalize=esp.data.normalize.PositiveNotNormalize, + record_interval=100, + optimizer=optimizer, + device=torch.device('cuda:0'), + ) + + results = exp.run() + + print(esp.app.report.markdown(results)) + + + curves = esp.app.report.curve(results) + + import os + os.mkdir(args.out) + for spec, curve in curves.items(): + np.save(args.out + "/" + "_".join(spec) + ".npy", curve) + + torch.save(net.state_dict(), args.out + "/net.th") + + for idx, state in exp.states.items(): + torch.save(state, args.out + "/net%s.th" % idx) + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--forcefield", default="gaff-1.81", type=str) + parser.add_argument("--layer", default="GraphConv", type=str) + parser.add_argument("--n_classes", default=100, type=int) + parser.add_argument( + "--config", nargs="*", default=[128, "leaky_relu", 128, "leaky_relu", 128, "leaky_relu"], + ) + + parser.add_argument( + "--training_metrics", nargs="*", default=["TypingCrossEntropy"] + ) + parser.add_argument( + "--test_metrics", nargs="*", default=["TypingAccuracy"] + ) + parser.add_argument( + "--out", default="results", type=str + ) + parser.add_argument("--janossy_config", nargs="*", default=[32, "leaky_relu"]) + + parser.add_argument("--graph_act", type=str, default="leaky_relu") + + parser.add_argument("--n_epochs", default=10, type=int) + + parser.add_argument("--weight", default=1.0, type=float) + + parser.add_argument("--lr", default=1e-5, type=float) + parser.add_argument("--batch_size", default=32, type=float) + parser.add_argument("--first", default=32, type=int) + args = parser.parse_args() + + run(args) + From b77c5af53f6d94d6e94c9c8b62d08ba2e83a654c Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Fri, 16 Apr 2021 16:23:10 -0400 Subject: [PATCH 28/39] c7o2h10 preceiving chemical species --- scripts/data/c7o2h10/run.py | 70 +++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 scripts/data/c7o2h10/run.py diff --git a/scripts/data/c7o2h10/run.py b/scripts/data/c7o2h10/run.py new file mode 100644 index 00000000..68df92f9 --- /dev/null +++ b/scripts/data/c7o2h10/run.py @@ -0,0 +1,70 @@ +import torch +import os +import numpy as np +import espaloma as esp +from openeye import oechem + + +def run(idx, u_thres=0.1): + # read xyz file + energies = open("c7o2h10_md/%s.energy.dat" % idx, "r").readlines()[1:] + energies = 0.037 * np.array([float(energy) for energy in energies]) + ifs = oechem.oemolistream() + ifs.open("c7o2h10_md/%s.xyz" % idx) + + # get list of molecules + mols = [] + for mol in ifs.GetOEGraphMols(): + mols.append(oechem.OEGraphMol(mol)) + assert len(mols) == len(energies) + + # read smiles + smiles = [oechem.OEMolToSmiles(mol) for mol in mols] + + # find the reference smiles + idx_ref = energies.argmin() + smiles_ref = smiles[idx_ref] + u_ref = energies.min() + + # pick the indices that are low in energy and have the same smiles string + ok_idxs = [_idx for _idx in range(len(smiles)) if smiles[_idx] == smiles_ref] + ok_idxs = [_idx for _idx in ok_idxs if energies[_idx] <= u_ref + u_thres] + + # filter based on indices + ok_mols = [mols[_idx] for _idx in ok_idxs] + ok_energies = [energies[_idx] for _idx in ok_idxs] + + # put coordinates in the graph + xs = [mol.GetCoords() for mol in ok_mols] + xs = torch.stack( + [torch.tensor([x[_idx] for _idx in range(len(x))]) for x in xs], + dim=1, + ) + + us = torch.tensor(ok_energies)[None, :] + + from openforcefield.topology import Molecule + g = esp.Graph(Molecule.from_openeye(mols[idx_ref], allow_undefined_stereo=True)) + + + print(g.mol.to_smiles()) + g.nodes['n1'].data['xyz'] = xs + g.nodes['g'].data['u_ref'] = us + + print(g.nodes['g'].data['u_ref'] - g.nodes['g'].data['u_ref'].mean()) + + + g = esp.data.md.subtract_nonbonded_force(g, "openff-1.2.0") + + print(g.nodes['n1'].data['xyz'].shape) + + + print(g.nodes['g'].data['u_ref'] - g.nodes['g'].data['u_ref'].mean()) + + g.save("out/%s" % idx) + +if __name__ == "__main__": + import sys + run(sys.argv[1]) + + From 34dc29148232bdc8582b6d70f9f7ef120f96c92a Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Fri, 16 Apr 2021 16:27:48 -0400 Subject: [PATCH 29/39] undo run script --- scripts/data/c7o2h10/run.py | 70 ------------------------------------- 1 file changed, 70 deletions(-) delete mode 100644 scripts/data/c7o2h10/run.py diff --git a/scripts/data/c7o2h10/run.py b/scripts/data/c7o2h10/run.py deleted file mode 100644 index 68df92f9..00000000 --- a/scripts/data/c7o2h10/run.py +++ /dev/null @@ -1,70 +0,0 @@ -import torch -import os -import numpy as np -import espaloma as esp -from openeye import oechem - - -def run(idx, u_thres=0.1): - # read xyz file - energies = open("c7o2h10_md/%s.energy.dat" % idx, "r").readlines()[1:] - energies = 0.037 * np.array([float(energy) for energy in energies]) - ifs = oechem.oemolistream() - ifs.open("c7o2h10_md/%s.xyz" % idx) - - # get list of molecules - mols = [] - for mol in ifs.GetOEGraphMols(): - mols.append(oechem.OEGraphMol(mol)) - assert len(mols) == len(energies) - - # read smiles - smiles = [oechem.OEMolToSmiles(mol) for mol in mols] - - # find the reference smiles - idx_ref = energies.argmin() - smiles_ref = smiles[idx_ref] - u_ref = energies.min() - - # pick the indices that are low in energy and have the same smiles string - ok_idxs = [_idx for _idx in range(len(smiles)) if smiles[_idx] == smiles_ref] - ok_idxs = [_idx for _idx in ok_idxs if energies[_idx] <= u_ref + u_thres] - - # filter based on indices - ok_mols = [mols[_idx] for _idx in ok_idxs] - ok_energies = [energies[_idx] for _idx in ok_idxs] - - # put coordinates in the graph - xs = [mol.GetCoords() for mol in ok_mols] - xs = torch.stack( - [torch.tensor([x[_idx] for _idx in range(len(x))]) for x in xs], - dim=1, - ) - - us = torch.tensor(ok_energies)[None, :] - - from openforcefield.topology import Molecule - g = esp.Graph(Molecule.from_openeye(mols[idx_ref], allow_undefined_stereo=True)) - - - print(g.mol.to_smiles()) - g.nodes['n1'].data['xyz'] = xs - g.nodes['g'].data['u_ref'] = us - - print(g.nodes['g'].data['u_ref'] - g.nodes['g'].data['u_ref'].mean()) - - - g = esp.data.md.subtract_nonbonded_force(g, "openff-1.2.0") - - print(g.nodes['n1'].data['xyz'].shape) - - - print(g.nodes['g'].data['u_ref'] - g.nodes['g'].data['u_ref'].mean()) - - g.save("out/%s" % idx) - -if __name__ == "__main__": - import sys - run(sys.argv[1]) - - From 580c20296cf7f8b8238158cd706912fa38946481 Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Fri, 16 Apr 2021 16:29:10 -0400 Subject: [PATCH 30/39] grab c7o2h10 --- scripts/data/c7o2h10/grab.py | 70 ++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 scripts/data/c7o2h10/grab.py diff --git a/scripts/data/c7o2h10/grab.py b/scripts/data/c7o2h10/grab.py new file mode 100644 index 00000000..68df92f9 --- /dev/null +++ b/scripts/data/c7o2h10/grab.py @@ -0,0 +1,70 @@ +import torch +import os +import numpy as np +import espaloma as esp +from openeye import oechem + + +def run(idx, u_thres=0.1): + # read xyz file + energies = open("c7o2h10_md/%s.energy.dat" % idx, "r").readlines()[1:] + energies = 0.037 * np.array([float(energy) for energy in energies]) + ifs = oechem.oemolistream() + ifs.open("c7o2h10_md/%s.xyz" % idx) + + # get list of molecules + mols = [] + for mol in ifs.GetOEGraphMols(): + mols.append(oechem.OEGraphMol(mol)) + assert len(mols) == len(energies) + + # read smiles + smiles = [oechem.OEMolToSmiles(mol) for mol in mols] + + # find the reference smiles + idx_ref = energies.argmin() + smiles_ref = smiles[idx_ref] + u_ref = energies.min() + + # pick the indices that are low in energy and have the same smiles string + ok_idxs = [_idx for _idx in range(len(smiles)) if smiles[_idx] == smiles_ref] + ok_idxs = [_idx for _idx in ok_idxs if energies[_idx] <= u_ref + u_thres] + + # filter based on indices + ok_mols = [mols[_idx] for _idx in ok_idxs] + ok_energies = [energies[_idx] for _idx in ok_idxs] + + # put coordinates in the graph + xs = [mol.GetCoords() for mol in ok_mols] + xs = torch.stack( + [torch.tensor([x[_idx] for _idx in range(len(x))]) for x in xs], + dim=1, + ) + + us = torch.tensor(ok_energies)[None, :] + + from openforcefield.topology import Molecule + g = esp.Graph(Molecule.from_openeye(mols[idx_ref], allow_undefined_stereo=True)) + + + print(g.mol.to_smiles()) + g.nodes['n1'].data['xyz'] = xs + g.nodes['g'].data['u_ref'] = us + + print(g.nodes['g'].data['u_ref'] - g.nodes['g'].data['u_ref'].mean()) + + + g = esp.data.md.subtract_nonbonded_force(g, "openff-1.2.0") + + print(g.nodes['n1'].data['xyz'].shape) + + + print(g.nodes['g'].data['u_ref'] - g.nodes['g'].data['u_ref'].mean()) + + g.save("out/%s" % idx) + +if __name__ == "__main__": + import sys + run(sys.argv[1]) + + From 9ab3793a5325cb8ce75ef4d06c95ab4eb4a47a5f Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Sun, 18 Apr 2021 14:46:52 -0400 Subject: [PATCH 31/39] subtract --- espaloma/data/md.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/espaloma/data/md.py b/espaloma/data/md.py index 7b879bfb..90a87190 100644 --- a/espaloma/data/md.py +++ b/espaloma/data/md.py @@ -141,10 +141,11 @@ def subtract_nonbonded_force( lambda node: {"u_ref": node.data["u_ref"] - energies}, ntype="g", ) - g.heterograph.apply_nodes( - lambda node: {"u_ref_prime": node.data["u_ref_prime"] - derivatives}, - ntype="n1", - ) + if "u_ref_prime" in g.nodes['n1']: + g.heterograph.apply_nodes( + lambda node: {"u_ref_prime": node.data["u_ref_prime"] - derivatives}, + ntype="n1", + ) return g From ea1564ca7e2f09bc37a0c09f0621788e6fe6bb02 Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Mon, 19 Apr 2021 11:38:02 -0400 Subject: [PATCH 32/39] rename --- scripts/data/{c7o2h10 => iso17}/grab.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/data/{c7o2h10 => iso17}/grab.py (100%) diff --git a/scripts/data/c7o2h10/grab.py b/scripts/data/iso17/grab.py similarity index 100% rename from scripts/data/c7o2h10/grab.py rename to scripts/data/iso17/grab.py From 2ab6dba2066ee910641f6935314a2b36a07ed1c0 Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Mon, 19 Apr 2021 12:54:04 -0400 Subject: [PATCH 33/39] Update grab.py --- scripts/data/iso17/grab.py | 40 ++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/scripts/data/iso17/grab.py b/scripts/data/iso17/grab.py index 68df92f9..c502f84e 100644 --- a/scripts/data/iso17/grab.py +++ b/scripts/data/iso17/grab.py @@ -3,7 +3,8 @@ import numpy as np import espaloma as esp from openeye import oechem - +from simtk import unit +from simtk.unit.quantity import Quantity def run(idx, u_thres=0.1): # read xyz file @@ -17,7 +18,7 @@ def run(idx, u_thres=0.1): for mol in ifs.GetOEGraphMols(): mols.append(oechem.OEGraphMol(mol)) assert len(mols) == len(energies) - + # read smiles smiles = [oechem.OEMolToSmiles(mol) for mol in mols] @@ -34,37 +35,38 @@ def run(idx, u_thres=0.1): ok_mols = [mols[_idx] for _idx in ok_idxs] ok_energies = [energies[_idx] for _idx in ok_idxs] + ofs = oechem.oemolostream() + ofs.open("mol.sdf") + oechem.OEWriteMolecule(ofs, ok_mols[0]) + # put coordinates in the graph xs = [mol.GetCoords() for mol in ok_mols] xs = torch.stack( - [torch.tensor([x[_idx] for _idx in range(len(x))]) for x in xs], + [ + torch.tensor( + [ + Quantity( + x[_idx], + unit.angstrom, + ).value_in_unit( + unit.bohr, + ) + for _idx in range(len(x)) + ] + ) for x in xs], dim=1, ) - + us = torch.tensor(ok_energies)[None, :] from openforcefield.topology import Molecule g = esp.Graph(Molecule.from_openeye(mols[idx_ref], allow_undefined_stereo=True)) - - - print(g.mol.to_smiles()) g.nodes['n1'].data['xyz'] = xs g.nodes['g'].data['u_ref'] = us - - print(g.nodes['g'].data['u_ref'] - g.nodes['g'].data['u_ref'].mean()) - - g = esp.data.md.subtract_nonbonded_force(g, "openff-1.2.0") - - print(g.nodes['n1'].data['xyz'].shape) - - - print(g.nodes['g'].data['u_ref'] - g.nodes['g'].data['u_ref'].mean()) - + print(g.nodes['g'].data['u_ref'] - g.nodes['g'].data['u_ref'].min()) g.save("out/%s" % idx) if __name__ == "__main__": import sys run(sys.argv[1]) - - From 37199262553833ea37f1569138e386b9979e33be Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Mon, 19 Apr 2021 17:55:08 -0400 Subject: [PATCH 34/39] grab --- scripts/data/ani1/grab.py | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 scripts/data/ani1/grab.py diff --git a/scripts/data/ani1/grab.py b/scripts/data/ani1/grab.py new file mode 100644 index 00000000..90aa41d1 --- /dev/null +++ b/scripts/data/ani1/grab.py @@ -0,0 +1,2 @@ +import espaloma as esp + From c76ac07ea9fadd98cc90701398108d4412de77a8 Mon Sep 17 00:00:00 2001 From: Yuanqing Wang Date: Tue, 20 Apr 2021 13:24:33 -0400 Subject: [PATCH 35/39] grab ani --- espaloma/data/dataset.py | 1 - scripts/data/ani1/grab.py | 9 +++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/espaloma/data/dataset.py b/espaloma/data/dataset.py index 911483bc..2306abef 100644 --- a/espaloma/data/dataset.py +++ b/espaloma/data/dataset.py @@ -317,4 +317,3 @@ def load(cls, path): paths = [_path for _path in paths if _path.isnumeric()] graphs = [esp.Graph.load(path + "/" + _path) for _path in paths] return cls(graphs) - diff --git a/scripts/data/ani1/grab.py b/scripts/data/ani1/grab.py index 90aa41d1..7b4eb7ef 100644 --- a/scripts/data/ani1/grab.py +++ b/scripts/data/ani1/grab.py @@ -1,2 +1,11 @@ +import h5py import espaloma as esp +def run(path, name): + ds = h5py.File(path)[name] + for key in ds: + mol = ds[key] + +if __name__ == "__main__": + import sys + run(sys.argv[1], sys.argv[2]) From 51e0ca6ce06d7cf320c4403772d5190a71f28de2 Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Tue, 20 Apr 2021 13:25:40 -0400 Subject: [PATCH 36/39] dataset backup --- espaloma/data/dataset.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/espaloma/data/dataset.py b/espaloma/data/dataset.py index 911483bc..9eb66430 100644 --- a/espaloma/data/dataset.py +++ b/espaloma/data/dataset.py @@ -314,7 +314,17 @@ def save(self, path): def load(cls, path): import os paths = os.listdir(path) - paths = [_path for _path in paths if _path.isnumeric()] - graphs = [esp.Graph.load(path + "/" + _path) for _path in paths] + paths = [_path for _path in paths] + + graphs = [] + for _path in paths: + try: + graphs.append( + esp.Graph.load(path + "/" + _path) + ) + + except: + pass + return cls(graphs) From 1fe0a06d3a0040f01127711575797db5c7079d13 Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Wed, 21 Apr 2021 11:00:47 -0400 Subject: [PATCH 37/39] update --- espaloma/mm/energy.py | 2 +- espaloma/mm/geometry.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/espaloma/mm/energy.py b/espaloma/mm/energy.py index 82798445..cd27840d 100644 --- a/espaloma/mm/energy.py +++ b/espaloma/mm/energy.py @@ -327,7 +327,7 @@ def energy_in_graph( msg="m_%s" % term, out="u_%s%s" % (term, suffix) ), ) - for term in terms + for term in terms if "u" in g.nodes[term].data }, cross_reducer="sum", ) diff --git a/espaloma/mm/geometry.py b/espaloma/mm/geometry.py index b74406f9..b0c550e8 100644 --- a/espaloma/mm/geometry.py +++ b/espaloma/mm/geometry.py @@ -189,7 +189,7 @@ def geometry_in_graph(g): **{ "n1_as_%s_in_n%s" % (pos_idx, big_idx): ( - copy_src(src="xyz", out="m_xyz%s" % pos_idx), + dgl.function.copy_src(src="xyz", out="m_xyz%s" % pos_idx), dgl.function.sum( msg="m_xyz%s" % pos_idx, out="xyz%s" % pos_idx ), @@ -200,7 +200,7 @@ def geometry_in_graph(g): **{ "n1_as_%s_in_%s" % (pos_idx, term): ( - copy_src(src="xyz", out="m_xyz%s" % pos_idx), + dgl.function.copy_src(src="xyz", out="m_xyz%s" % pos_idx), dgl.function.sum( msg="m_xyz%s" % pos_idx, out="xyz%s" % pos_idx ), @@ -211,7 +211,7 @@ def geometry_in_graph(g): **{ "n1_as_%s_in_%s" % (pos_idx, term): ( - copy_src(src="xyz", out="m_xyz%s" % pos_idx), + dgl.function.copy_src(src="xyz", out="m_xyz%s" % pos_idx), dgl.function.sum( msg="m_xyz%s" % pos_idx, out="xyz%s" % pos_idx ), From 1230a67b8d5d886fdb77256387341aac10675cda Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Wed, 21 Apr 2021 11:46:01 -0400 Subject: [PATCH 38/39] grab --- scripts/data/ani1/grab.py | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/scripts/data/ani1/grab.py b/scripts/data/ani1/grab.py index 7b4eb7ef..01a10bf6 100644 --- a/scripts/data/ani1/grab.py +++ b/scripts/data/ani1/grab.py @@ -1,11 +1,39 @@ +import torch import h5py import espaloma as esp +import numpy as np + +def run(path, u_thres=0.1): + _ds = h5py.File(path) + for name in _ds.keys(): + ds = _ds[name] + _idx = 0 + for key in ds: + print(_idx) + mol = ds[key] + smiles = mol["smiles"] + smiles = np.array(smiles).tolist() + smiles = [x.decode("UTF-8") for x in smiles] + smiles = "".join(smiles) + + xs = np.array(mol["coordinates"]) + us = np.array(mol["energies"]) + species = list(mol["species"]) + species = [x.decode("UTF-8") for x in species] + + + idxs = list(range(len(xs))) + idx_ref = us.argmin() + ok_idxs = [idx for idx in idxs if us[idx] <= us[idx_ref] + u_thres] + + from espaloma.data.utils import infer_mol_from_coordinates + g = infer_mol_from_coordinates(xs[idx_ref], species, smiles_ref=smiles) + g.nodes['n1'].data['xyz'] = torch.tensor(xs[ok_idxs, :, :]).transpose(1, 0) + g.nodes['g'].data['u_ref'] = torch.tensor(us[None, ok_idxs]) + g.save("ani1/%s%s" % (name, _idx)) + _idx += 1 -def run(path, name): - ds = h5py.File(path)[name] - for key in ds: - mol = ds[key] if __name__ == "__main__": import sys - run(sys.argv[1], sys.argv[2]) + run(sys.argv[1]) From 9304d3f08dce2915dc986a9b82e74bb2ad82d74a Mon Sep 17 00:00:00 2001 From: yuanqing-wang Date: Mon, 26 Apr 2021 15:00:03 -0400 Subject: [PATCH 39/39] energy ii debug --- espaloma/mm/energy.py | 16 ++++++++++++++-- espaloma/mm/tests/test_energy_ii.py | 1 + espaloma/mm/torsion.py | 7 +++---- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/espaloma/mm/energy.py b/espaloma/mm/energy.py index cd27840d..aaf95bbc 100644 --- a/espaloma/mm/energy.py +++ b/espaloma/mm/energy.py @@ -76,6 +76,15 @@ def apply_angle_ii(nodes, suffix=""): def apply_torsion_ii(nodes, suffix=""): """ Torsion energy in nodes. """ return { + "u_angle_torsion%s" + % suffix: esp.mm.torsion.angle_torsion( + x=nodes.data["x"], + x_angle_left=nodes.data["x_angle_left"], + x_angle_right=nodes.data["x_angle_right"], + eq_angle_left=nodes.data["eq_angle_left"], + eq_angle_right=nodes.data["eq_angle_right"], + k_angle_torsion=nodes.data["k_angle_torsion"], + ), "u_angle_angle%s" % suffix: esp.mm.torsion.angle_angle( x_angle_left=nodes.data["x_angle_left"], @@ -271,7 +280,8 @@ def energy_in_graph( g.apply_nodes( lambda node: { 'u%s' % suffix: - node.data['u_urey_bradley%s' % suffix]\ + node.data['u%s' % suffix]\ + + node.data['u_urey_bradley%s' % suffix]\ + node.data['u_bond_bond%s' % suffix]\ + node.data['u_bond_angle%s' % suffix] }, @@ -291,7 +301,9 @@ def energy_in_graph( g.apply_nodes( lambda node: { 'u%s' % suffix: - node.data['u_angle_angle%s' % suffix]\ + node.data['u%s' % suffix]\ + + node.data['u_angle_torsion%s' % suffix]\ + + node.data['u_angle_angle%s' % suffix]\ + node.data['u_angle_angle_torsion%s' % suffix]\ + node.data['u_bond_torsion%s' % suffix] }, diff --git a/espaloma/mm/tests/test_energy_ii.py b/espaloma/mm/tests/test_energy_ii.py index 4ee926bc..b895e2dc 100644 --- a/espaloma/mm/tests/test_energy_ii.py +++ b/espaloma/mm/tests/test_energy_ii.py @@ -46,6 +46,7 @@ def test_energy(): 4: { 'k': 6, 'k_angle_angle': 1, + 'k_angle_torsion': 1, 'k_angle_angle_torsion': 1, 'k_side_torsion': 1, 'k_center_torsion': 1, diff --git a/espaloma/mm/torsion.py b/espaloma/mm/torsion.py index 0e8616da..19e8af1b 100644 --- a/espaloma/mm/torsion.py +++ b/espaloma/mm/torsion.py @@ -60,20 +60,19 @@ def angle_torsion( eq_angle_left, eq_angle_right, x, - k_angle_torsion_left, - k_angle_torsion_right, + k_angle_torsion, periodicity=list(range(1,3)), ): return esp.mm.functional.harmonic_periodic_coupled( x_harmonic=x_angle_left, x_periodic=x, - k=k_angle_torsion_left, + k=k_angle_torsion, eq=eq_angle_left, periodicity=periodicity, ) + esp.mm.functional.harmonic_periodic_coupled( x_harmonic=x_angle_right, x_periodic=x, - k=k_angle_torsion_right, + k=k_angle_torsion, eq=eq_angle_right, periodicity=periodicity, )