From 19b831a935563d0664433b7fa6ec0a339172adb2 Mon Sep 17 00:00:00 2001 From: federica Date: Tue, 9 Jul 2024 14:03:59 +0100 Subject: [PATCH 1/9] initial draft for workgraph, pre-commit fail --- aiida_mlip/workflows/hts.py | 114 +++++++++++++++++++++++++++++++ aiida_mlip/workflows/training.py | 65 ++++++++++++++++++ pyproject.toml | 4 +- 3 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 aiida_mlip/workflows/hts.py create mode 100644 aiida_mlip/workflows/training.py diff --git a/aiida_mlip/workflows/hts.py b/aiida_mlip/workflows/hts.py new file mode 100644 index 00000000..13dca328 --- /dev/null +++ b/aiida_mlip/workflows/hts.py @@ -0,0 +1,114 @@ +"""Example code for submitting single point calculation""" + +import csv +from pathlib import Path +import sys +import time + +import click + +from aiida.common import NotExistent +from aiida.engine import run, run_get_node, run_get_pk, submit +from aiida.orm import load_code, load_group, load_node +from aiida.plugins import CalculationFactory + +from aiida_mlip.data.config import JanusConfigfile +from aiida_mlip.data.model import ModelData +from aiida_mlip.helpers.help_load import load_structure + + +def run_hts(folder, config, calc, output_filename, code, group, launch): + # Add the required inputs for aiida + metadata = {"options": {"resources": {"num_machines": 1}}} + + # All the other paramenters we want them from the config file + # We want to pass it as a AiiDA data type for the provenance + conf = JanusConfigfile(config) + # Define calculation to run + Calculation = CalculationFactory(f"mlip.{calc}") + model = ModelData.download( + url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", + cache_dir="models", + architecture="mace_mp", + filename="small.model", + ) + list_of_nodes = [] + p = Path(folder) + for child in p.glob("**/*"): + if child.name.endswith("cif"): + print(child.name) + metadata["label"] = f"{child.name}" + # This structure will overwrite the one in the config file if present + structure = load_structure(child.absolute()) + # Run calculation + if launch == "run_get_pk": + result, pk = run_get_pk( + Calculation, + code=code, + struct=structure, + metadata=metadata, + config=conf, + model=model, + ) + list_of_nodes.append(pk) + + group.add_nodes(load_node(pk)) + time.sleep(1) + print(f"Printing results from calculation: {result}") + + if launch == "submit": + result = submit( + Calculation, + code=code, + struct=structure, + metadata=metadata, + config=conf, + model=model, + ) + list_of_nodes.append(result.pk) + + group.add_nodes(load_node(result.pk)) + + print(f"Printing results from calculation: {result}") + + print(f"printing dictionary with all {list_of_nodes}") + # write list of nodes in csv file + with open(output_filename, "w", newline="") as csvfile: + writer = csv.writer(csvfile) + writer.writerow(["name", "PK"]) + for node in list_of_nodes: + writer.writerow([load_node(node).label, node]) + + +@click.command("cli") +@click.option("--folder", type=Path) +@click.option( + "--config", + type=Path, + help="Config file to use", + default="/work4/scd/scarf1228/config_janus.yaml", +) +@click.option("--calc", type=str, help="Calc to run", default="sp") +@click.option("--output_filename", type=str, default="list_nodes.csv") +@click.option("--codelabel", type=str, default="janus@scarf-hq") +@click.option("--group", type=int, default=8) +@click.option( + "--launch", type=str, default="submit", help="can be run_get_pk or submit" +) +def cli(folder, config, calc, output_filename, codelabel, group, launch): + """Click interface.""" + try: + code = load_code(codelabel) + except NotExistent: + print(f"The code '{codelabel}' does not exist.") + sys.exit(1) + try: + group = load_group(group) + except NotExistent: + print(f"The group '{group}' does not exist.") + + run_hts(folder, config, calc, output_filename, code, group, launch) + + +if __name__ == "__main__": + cli() # pylint: disable=no-value-for-parameter diff --git a/aiida_mlip/workflows/training.py b/aiida_mlip/workflows/training.py new file mode 100644 index 00000000..015a3abb --- /dev/null +++ b/aiida_mlip/workflows/training.py @@ -0,0 +1,65 @@ +from aiida_workgraph import Workgraph, task + +from aiida.engine import submit +from aiida.orm import load_node +from aiida.plugins import CalculationFactory, WorkflowFactory + +from aiida_mlip.helpers.help_load import load_structure + + +# define DFT task +@task.calcfunction() +def submit_DFT(child, dft_inputs, group): + print(child.name) + dft_inputs['metadata']['label']=f"{child.name}" + optcalculation = WorkflowFactory("quantumespresso.pw.relax") + struc = load_structure(child) + dft_inputs['struct']=struc + result = submit(optcalculation, **inputs) + group.add_nodes(load_node(result.pk)) + return group + +#syntax of this wrong +@task.calcfunction() +def create_input(group): + with open("input_file") as input_file: + for node in group: + #get the output structure + structure = node.outputs.structure + #convert it to extxyz + structure.to_ase() + # add to file + input_file.writelines(structure) + return input_file + + +# define traning task +@task.calcfunction() +def training(input_file, train_inputs): + training = CalculationFactory("mlip.train") + #check name of input file in training + train_inputs['xyz_input'] = input_file + future = submit(training, **train_inputs) + return future + + + +wg = WorkGraph("training_workflow") + +for child in folder.glob('**/*'): + if child.name.endswith("cif"): + submitdft_task = wg.tasks.new(submit_DFT, name="submission") + +# link the output of the `add` task to one of the `x` input of the `multiply` task. +create_file_task = wg.tasks.new(create_input, name="createinput", group = submitdft_task.outputs["result"]) + +train_task = wg.tasks.new(training, name="training", input_file=create_file_task.outputs['input_file']) + +# export the workgraph to html file so that it can be visualized in a browser +wg.to_html() +# comment out the following line to visualize the workgraph in jupyter-notebook +# wg + +# Set the maximum number of running jobs inside the WorkGraph +wg.max_number_jobs = 10 +wg.submit(wait=True) diff --git a/pyproject.toml b/pyproject.toml index 5fe3ceec..a426e7cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,9 @@ python = "^3.9" aiida-core = "^2.6" ase = "^3.23.0" voluptuous = "^0.14" -janus-core = "^v0.6.0b0" +janus-core = "^v0.6.2" +aiida-workgraph = "^0.3.7" +#aiida-quantumespresso = "^4.6.0" [tool.poetry.group.dev.dependencies] coverage = {extras = ["toml"], version = "^7.4.1"} From 6df534da3c536f758f8000c6801c37140d4cf2b0 Mon Sep 17 00:00:00 2001 From: federica Date: Fri, 19 Jul 2024 09:47:05 +0100 Subject: [PATCH 2/9] minor change --- aiida_mlip/workflows/training.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/aiida_mlip/workflows/training.py b/aiida_mlip/workflows/training.py index 015a3abb..508685bf 100644 --- a/aiida_mlip/workflows/training.py +++ b/aiida_mlip/workflows/training.py @@ -12,12 +12,12 @@ def submit_DFT(child, dft_inputs, group): print(child.name) dft_inputs['metadata']['label']=f"{child.name}" - optcalculation = WorkflowFactory("quantumespresso.pw.relax") - struc = load_structure(child) - dft_inputs['struct']=struc - result = submit(optcalculation, **inputs) - group.add_nodes(load_node(result.pk)) - return group + optcalculation = WorkflowFactory("quantumespresso.pw.relax") + struc = load_structure(child) + dft_inputs['struct']=struc + result = submit(optcalculation, **dft_inputs) + group.add_nodes(load_node(result.pk)) + return group #syntax of this wrong @task.calcfunction() From 2c861734bbcb423c7a06e2a017d88b35c25cbedc Mon Sep 17 00:00:00 2001 From: federica Date: Mon, 22 Jul 2024 14:16:37 +0100 Subject: [PATCH 3/9] pyproject changes? --- pyproject.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a426e7cd..8705cbfe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,9 +30,10 @@ python = "^3.9" aiida-core = "^2.6" ase = "^3.23.0" voluptuous = "^0.14" -janus-core = "^v0.6.2" +#janus-core = "^v0.6.2" aiida-workgraph = "^0.3.7" -#aiida-quantumespresso = "^4.6.0" +janus-core = { git = "https://github.com/stfc/janus-core.git", branch = "main" } +aiida-quantumespresso = "^v4.6.0" [tool.poetry.group.dev.dependencies] coverage = {extras = ["toml"], version = "^7.4.1"} From e917a4f2f8141cbbae0f63123ff862160294a270 Mon Sep 17 00:00:00 2001 From: federica Date: Fri, 26 Jul 2024 10:25:00 +0100 Subject: [PATCH 4/9] workgraph mostly done but pre-commits fail --- aiida_mlip/workflows/training.py | 303 ++++++++++++++++++++++++++----- pyproject.toml | 3 +- 2 files changed, 259 insertions(+), 47 deletions(-) diff --git a/aiida_mlip/workflows/training.py b/aiida_mlip/workflows/training.py index 508685bf..e1ea65e4 100644 --- a/aiida_mlip/workflows/training.py +++ b/aiida_mlip/workflows/training.py @@ -1,65 +1,276 @@ -from aiida_workgraph import Workgraph, task +""" Workgraph to run DFT calculations and use the outputs fpr training a MLIP model.""" -from aiida.engine import submit -from aiida.orm import load_node +from pathlib import Path + +from aiida_workgraph import WorkGraph, task +from sklearn.model_selection import train_test_split + +from aiida.orm import Dict, SinglefileData, load_code from aiida.plugins import CalculationFactory, WorkflowFactory +from aiida_mlip.data.config import JanusConfigfile from aiida_mlip.helpers.help_load import load_structure +PwRelaxWorkChain = WorkflowFactory("quantumespresso.pw.relax") + + +@task.graph_builder(outputs=[{"name": "result", "from": "context.pw"}]) +def run_pw_calc(folder: Path, dft_inputs: dict) -> WorkGraph: + """ + Run a quantumespresso calculation using PwRelaxWorkChain. + + Parameters + ---------- + folder : Path + Path to the folder containing input structure files. + dft_inputs : dict + Dictionary of inputs for the DFT calculations. + + Returns + ------- + WorkGraph + The work graph containing the PW relaxation tasks. + """ + wg = WorkGraph() + for child in folder.glob("**/*xyz"): + structure = load_structure(child) + dft_inputs["base"]["structure"] = structure + dft_inputs["base"]["pw"]["metadata"]["label"] = child.stem + pw_task = wg.add_task( + PwRelaxWorkChain, name=f"pw_relax{child.stem}", **dft_inputs + ) + pw_task.set_context({"result": f"pw_relax_{child}"}) + return wg + -# define DFT task @task.calcfunction() -def submit_DFT(child, dft_inputs, group): - print(child.name) - dft_inputs['metadata']['label']=f"{child.name}" - optcalculation = WorkflowFactory("quantumespresso.pw.relax") - struc = load_structure(child) - dft_inputs['struct']=struc - result = submit(optcalculation, **dft_inputs) - group.add_nodes(load_node(result.pk)) - return group - -#syntax of this wrong +def create_input(**inputs: dict) -> SinglefileData: + """ + Create input files from given structures. + + Parameters + ---------- + **inputs : dict + Dictionary where keys are names and values are structure data. + + Returns + ------- + SinglefileData + A SinglefileData node containing the generated input data. + """ + input_data = [] + for name, structure in inputs.items(): + ase_structure = structure.to_ase() + extxyz_str = ase_structure.write(format="extxyz") + input_data.append(extxyz_str) + temp_file_path = "tmp.extxyz" + with open(temp_file_path, "w") as temp_file: + temp_file.write("\n".join(input_data)) + + file_data = SinglefileData(file=temp_file_path) + + return file_data + + @task.calcfunction() -def create_input(group): - with open("input_file") as input_file: - for node in group: - #get the output structure - structure = node.outputs.structure - #convert it to extxyz - structure.to_ase() - # add to file - input_file.writelines(structure) - return input_file - - -# define traning task +def split_xyz_file(xyz_file: SinglefileData) -> dict: + """ + Split an XYZ file into training, testing, and validation datasets. + + Parameters + ---------- + xyz_file : SinglefileData + A SinglefileData node containing the XYZ file. + + Returns + ------- + dict + A dictionary with keys 'train', 'test', and 'validation', each containing + SinglefileData nodes for the respective datasets. + """ + with xyz_file.open() as file: + lines = file.readlines() + + data = [line.strip() for line in lines if line.strip()] + + train_data, test_validation_data = train_test_split( + data, test_size=0.4, random_state=42 + ) + test_data, validation_data = train_test_split( + test_validation_data, test_size=0.5, random_state=42 + ) + + train_path = "train.extxyz" + test_path = "test.extxyz" + validation_path = "validation.extxyz" + + with open(train_path, "w") as f: + f.write("\n".join(train_data)) + with open(test_path, "w") as f: + f.write("\n".join(test_data)) + with open(validation_path, "w") as f: + f.write("\n".join(validation_data)) + + return { + "train": SinglefileData(file=train_path), + "test": SinglefileData(file=test_path), + "validation": SinglefileData(file=validation_path), + } + + @task.calcfunction() -def training(input_file, train_inputs): - training = CalculationFactory("mlip.train") - #check name of input file in training - train_inputs['xyz_input'] = input_file - future = submit(training, **train_inputs) - return future +def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile: + """ + Update the JanusConfigfile with new paths for train, test, and validation datasets. + Parameters + ---------- + janusconfigfile : JanusConfigfile + The original JanusConfigfile. + Returns + ------- + JanusConfigfile + A new JanusConfigfile with updated paths. + """ + janus_dict = janusconfigfile.as_dictionary + config_parse = janusconfigfile.get_content() -wg = WorkGraph("training_workflow") + content = config_parse.replace(janus_dict["train_file"], "train.extxyz") + content = content.replace(janus_dict["test_file"], "test.extxyz") + content = content.replace(janus_dict["train_file"], "validation.extxyz") -for child in folder.glob('**/*'): - if child.name.endswith("cif"): - submitdft_task = wg.tasks.new(submit_DFT, name="submission") + new_config_path = "./config.yml" -# link the output of the `add` task to one of the `x` input of the `multiply` task. -create_file_task = wg.tasks.new(create_input, name="createinput", group = submitdft_task.outputs["result"]) + with open(new_config_path, "w") as file: + file.write(content) -train_task = wg.tasks.new(training, name="training", input_file=create_file_task.outputs['input_file']) + return JanusConfigfile(file=new_config_path) -# export the workgraph to html file so that it can be visualized in a browser -wg.to_html() -# comment out the following line to visualize the workgraph in jupyter-notebook -# wg -# Set the maximum number of running jobs inside the WorkGraph +wg = WorkGraph("trainingworkflow") +folder_path = Path("/home/federica/prova_training_wg") +code = load_code("qe-7.1@scarf1") +inputs = { + "base": { + "settings": Dict({"GAMMA_ONLY": True}), + "pw": { + "parameters": Dict( + { + "CONTROL": { + "calculation": "vc-relax", + "nstep": 1200, + "etot_conv_thr": 1e-05, + "forc_conv_thr": 1e-04, + }, + "SYSTEM": { + "ecutwfc": 500, + "input_dft": "PBE", + "nspin": 1, + "occupations": "smearing", + "degauss": 0.001, + "smearing": "m-p", + }, + "ELECTRONS": { + "electron_maxstep": 1000, + "scf_must_converge": False, + "conv_thr": 1e-08, + "mixing_beta": 0.25, + "diago_david_ndim": 4, + "startingpot": "atomic", + "startingwfc": "atomic+random", + }, + "IONS": { + "ion_dynamics": "bfgs", + }, + "CELL": { + "cell_dynamics": "bfgs", + "cell_dofree": "ibrav", + }, + } + ), + "code": code, + "metadata": { + "options": { + "resources": { + "num_machines": 4, + "num_mpiprocs_per_machine": 32, + }, + "max_wallclock_seconds": 48 * 60 * 60, + }, + }, + }, + }, + "base_final_scf": { + "pw": { + "parameters": Dict( + { + "CONTROL": { + "calculation": "scf", + "tprnfor": True, + }, + "SYSTEM": { + "ecutwfc": 70, + "ecutrho": 650, + "input_dft": "PBE", + "occupations": "smearing", + "degauss": 0.001, + "smearing": "m-p", + }, + "ELECTRONS": { + "conv_thr": 1e-10, + "mixing_beta": 0.25, + "diago_david_ndim": 4, + "startingpot": "atomic", + "startingwfc": "atomic+random", + }, + } + ), + "code": code, + "metadata": { + "options": { + "resources": { + "num_machines": 1, + "num_mpiprocs_per_machine": 32, + }, + "max_wallclock_seconds": 3 * 60 * 60, + }, + }, + }, + }, +} + +pw_task = wg.add_task( + run_pw_calc, name="pw_relax_results", folder=folder_path, dft_inputs=inputs +) + +print("CHECKPOINT1") +create_file_task = wg.add_task(create_input, name="create_input") +wg.add_link(pw_task.outputs[0], create_file_task.inputs[0]) + +print("CHECKPOINT2") +split_files_task = wg.add_task( + split_xyz_file, name="split_xyz", xyz_file=create_file_task.outputs.result +) +print("CHECKPOINT3") +janusconfigfile_path = "/home/federica/prova_training_wg/mlip_train.yml" +janusconfigfile = JanusConfigfile(file=janusconfigfile_path) +update_config_task = wg.add_task( + update_janusconfigfile, + name="update_janusconfigfile", + janusconfigfile=janusconfigfile, +) + +wg.add_link(split_files_task.outputs["result"], update_config_task.inputs["_wait"]) +print("CHECKPOINT4") +training_calc = CalculationFactory("mlip.train") +train_inputs = {} +train_inputs["config_file"] = update_config_task.outputs.result +train_task = wg.add_task( + training_calc, name="training", mlip_config=update_config_task.outputs.result +) + +wg.to_html() +print("CHECKPOINT5") wg.max_number_jobs = 10 wg.submit(wait=True) diff --git a/pyproject.toml b/pyproject.toml index 8705cbfe..329d678e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,9 +31,10 @@ aiida-core = "^2.6" ase = "^3.23.0" voluptuous = "^0.14" #janus-core = "^v0.6.2" -aiida-workgraph = "^0.3.7" +aiida-workgraph = {extras = ["widget"], version = "^0.3.14"} janus-core = { git = "https://github.com/stfc/janus-core.git", branch = "main" } aiida-quantumespresso = "^v4.6.0" +scikit-learn = "^1.5.1" [tool.poetry.group.dev.dependencies] coverage = {extras = ["toml"], version = "^7.4.1"} From d128b0160f98ee99584e3cfb3f3a9471d42b6e2b Mon Sep 17 00:00:00 2001 From: federicazanca Date: Tue, 30 Jul 2024 13:53:26 +0100 Subject: [PATCH 5/9] change paths --- .../workflows/html/trainingworkflow.html | 258 ++++++++++++++++++ aiida_mlip/workflows/training.py | 10 +- 2 files changed, 263 insertions(+), 5 deletions(-) create mode 100644 aiida_mlip/workflows/html/trainingworkflow.html diff --git a/aiida_mlip/workflows/html/trainingworkflow.html b/aiida_mlip/workflows/html/trainingworkflow.html new file mode 100644 index 00000000..a15f40e4 --- /dev/null +++ b/aiida_mlip/workflows/html/trainingworkflow.html @@ -0,0 +1,258 @@ + + + + + + + Rete.js with React in Vanilla JS + + + + + + + + + + + + + + + + + + + + +
+ + + diff --git a/aiida_mlip/workflows/training.py b/aiida_mlip/workflows/training.py index e1ea65e4..e409864a 100644 --- a/aiida_mlip/workflows/training.py +++ b/aiida_mlip/workflows/training.py @@ -37,9 +37,9 @@ def run_pw_calc(folder: Path, dft_inputs: dict) -> WorkGraph: dft_inputs["base"]["structure"] = structure dft_inputs["base"]["pw"]["metadata"]["label"] = child.stem pw_task = wg.add_task( - PwRelaxWorkChain, name=f"pw_relax{child.stem}", **dft_inputs + PwRelaxWorkChain, name=f"pw_relax_{child.stem}", **dft_inputs ) - pw_task.set_context({"result": f"pw_relax_{child}"}) + pw_task.set_context({"result": f"pw_relax_{child.stem}"}) return wg @@ -149,8 +149,8 @@ def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile: wg = WorkGraph("trainingworkflow") -folder_path = Path("/home/federica/prova_training_wg") -code = load_code("qe-7.1@scarf1") +folder_path = Path("/work4/scd/scarf1228/prova_train_workgraph/") +code = load_code("qe-7.1@scarf") inputs = { "base": { "settings": Dict({"GAMMA_ONLY": True}), @@ -253,7 +253,7 @@ def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile: split_xyz_file, name="split_xyz", xyz_file=create_file_task.outputs.result ) print("CHECKPOINT3") -janusconfigfile_path = "/home/federica/prova_training_wg/mlip_train.yml" +janusconfigfile_path = "/work4/scd/scarf1228/prova_train_workgraph/mlip_train.yml" janusconfigfile = JanusConfigfile(file=janusconfigfile_path) update_config_task = wg.add_task( update_janusconfigfile, From 95fbb81680c075dbe11771a83e1f66d5cfae84a7 Mon Sep 17 00:00:00 2001 From: federicazanca Date: Tue, 30 Jul 2024 15:06:21 +0100 Subject: [PATCH 6/9] ok but entry point not working --- .../workflows/html/trainingworkflow.html | 2 +- aiida_mlip/workflows/training.py | 24 ++++++++++++------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/aiida_mlip/workflows/html/trainingworkflow.html b/aiida_mlip/workflows/html/trainingworkflow.html index a15f40e4..0dad339b 100644 --- a/aiida_mlip/workflows/html/trainingworkflow.html +++ b/aiida_mlip/workflows/html/trainingworkflow.html @@ -59,7 +59,7 @@ const { RenderUtils } = ReteRenderUtils; const styled = window.styled; - const workgraphData = {"name": "trainingworkflow", "uuid": "ad06a2d4-4e59-11ef-8066-3cecef4478be", "state": "CREATED", "nodes": {"pw_relax_results": {"label": "pw_relax_results", "inputs": [{"name": "folder", "identifier": "Any", "uuid": "ad0ea862-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad0ea236-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "dft_inputs", "identifier": "Any", "uuid": "ad0eab82-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad0ea236-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}], "outputs": [{"name": "result"}], "position": [30, 30]}, "create_input": {"label": "create_input", "inputs": [{"name": "metadata"}], "outputs": [{"name": "result"}], "position": [60, 60]}, "split_xyz": {"label": "split_xyz", "inputs": [{"name": "xyz_file", "identifier": "Any", "uuid": "ad0f7c1a-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad0f6f86-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [{"from_node": "create_input", "from_socket": "result", "from_socket_uuid": "ad0f1f7c-4e59-11ef-8066-3cecef4478be"}], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "xyz_file"}], "outputs": [{"name": "result"}], "position": [90, 90]}, "update_janusconfigfile": {"label": "update_janusconfigfile", "inputs": [{"name": "janusconfigfile", "identifier": "Any", "uuid": "ad11cbbe-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad11be80-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "_wait"}], "outputs": [{"name": "result"}], "position": [120, 120]}, "training": {"label": "training", "inputs": [{"name": "mlip_config", "identifier": "Any", "uuid": "ad143656-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad13f862-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [{"from_node": "update_janusconfigfile", "from_socket": "result", "from_socket_uuid": "ad11cd9e-4e59-11ef-8066-3cecef4478be"}], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "mlip_config"}], "outputs": [], "position": [150, 150]}}, "links": [{"from_socket": "result", "from_node": "pw_relax_results", "from_socket_uuid": "ad0eae66-4e59-11ef-8066-3cecef4478be", "to_socket": "metadata", "to_node": "create_input", "state": false}, {"from_socket": "result", "from_node": "create_input", "from_socket_uuid": "ad0f1f7c-4e59-11ef-8066-3cecef4478be", "to_socket": "xyz_file", "to_node": "split_xyz", "state": false}, {"from_socket": "result", "from_node": "split_xyz", "from_socket_uuid": "ad0f7eea-4e59-11ef-8066-3cecef4478be", "to_socket": "_wait", "to_node": "update_janusconfigfile", "state": false}, {"from_socket": "result", "from_node": "update_janusconfigfile", "from_socket_uuid": "ad11cd9e-4e59-11ef-8066-3cecef4478be", "to_socket": "mlip_config", "to_node": "training", "state": false}]} + const workgraphData = {"name": "trainingworkflow", "uuid": "91809e08-4e7b-11ef-9a8b-3cecef4478be", "state": "CREATED", "nodes": {"pw_relax": {"label": "pw_relax", "inputs": [{"name": "folder", "identifier": "Any", "uuid": "91885fd0-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "91885bde-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "dft_inputs", "identifier": "Any", "uuid": "9188614c-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "91885bde-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}], "outputs": [{"name": "result"}], "position": [30, 30]}, "create_input": {"label": "create_input", "inputs": [{"name": "inputs"}], "outputs": [{"name": "result"}], "position": [60, 60]}, "split_xyz": {"label": "split_xyz", "inputs": [{"name": "xyz_file", "identifier": "Any", "uuid": "9188e19e-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "9188dab4-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [{"from_node": "create_input", "from_socket": "result", "from_socket_uuid": "9188a922-4e7b-11ef-9a8b-3cecef4478be"}], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "xyz_file"}], "outputs": [{"name": "result"}], "position": [90, 90]}, "update_janusconfigfile": {"label": "update_janusconfigfile", "inputs": [{"name": "janusconfigfile", "identifier": "Any", "uuid": "918a575e-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "918a4ec6-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "_wait"}], "outputs": [{"name": "result"}], "position": [120, 120]}, "training": {"label": "training", "inputs": [{"name": "mlip_config", "identifier": "Any", "uuid": "918b5794-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "918b2d50-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [{"from_node": "update_janusconfigfile", "from_socket": "result", "from_socket_uuid": "918a58b2-4e7b-11ef-9a8b-3cecef4478be"}], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "mlip_config"}], "outputs": [], "position": [150, 150]}}, "links": [{"from_socket": "result", "from_node": "pw_relax", "from_socket_uuid": "91886322-4e7b-11ef-9a8b-3cecef4478be", "to_socket": "inputs", "to_node": "create_input", "state": false}, {"from_socket": "result", "from_node": "create_input", "from_socket_uuid": "9188a922-4e7b-11ef-9a8b-3cecef4478be", "to_socket": "xyz_file", "to_node": "split_xyz", "state": false}, {"from_socket": "result", "from_node": "split_xyz", "from_socket_uuid": "9188e2b6-4e7b-11ef-9a8b-3cecef4478be", "to_socket": "_wait", "to_node": "update_janusconfigfile", "state": false}, {"from_socket": "result", "from_node": "update_janusconfigfile", "from_socket_uuid": "918a58b2-4e7b-11ef-9a8b-3cecef4478be", "to_socket": "mlip_config", "to_node": "training", "state": false}]} // Define Schemes to use in vanilla JS const Schemes = { diff --git a/aiida_mlip/workflows/training.py b/aiida_mlip/workflows/training.py index e409864a..d5f37fde 100644 --- a/aiida_mlip/workflows/training.py +++ b/aiida_mlip/workflows/training.py @@ -6,13 +6,13 @@ from sklearn.model_selection import train_test_split from aiida.orm import Dict, SinglefileData, load_code -from aiida.plugins import CalculationFactory, WorkflowFactory +from aiida.plugins import CalculationFactory, WorkflowFactory, entry_point from aiida_mlip.data.config import JanusConfigfile from aiida_mlip.helpers.help_load import load_structure +from aiida_quantumespresso.workflows.pw.relax import PwRelaxWorkChain -PwRelaxWorkChain = WorkflowFactory("quantumespresso.pw.relax") - +#PwRelaxWorkChain = WorkflowFactory("quantumespresso.pw.relax") @task.graph_builder(outputs=[{"name": "result", "from": "context.pw"}]) def run_pw_calc(folder: Path, dft_inputs: dict) -> WorkGraph: @@ -31,18 +31,21 @@ def run_pw_calc(folder: Path, dft_inputs: dict) -> WorkGraph: WorkGraph The work graph containing the PW relaxation tasks. """ + + print("CHECKPOINT 6") wg = WorkGraph() for child in folder.glob("**/*xyz"): structure = load_structure(child) dft_inputs["base"]["structure"] = structure dft_inputs["base"]["pw"]["metadata"]["label"] = child.stem - pw_task = wg.add_task( - PwRelaxWorkChain, name=f"pw_relax_{child.stem}", **dft_inputs - ) - pw_task.set_context({"result": f"pw_relax_{child.stem}"}) + pw_task = wg.add_task(PwRelaxWorkChain, name=f"pw_relax_{child.stem}") + pw_task.set(dft_inputs) + pw_task.set_context({"final_structure": f"pw.{child.stem}"}) + print("CHECKPOINT 7") return wg + @task.calcfunction() def create_input(**inputs: dict) -> SinglefileData: """ @@ -58,6 +61,7 @@ def create_input(**inputs: dict) -> SinglefileData: SinglefileData A SinglefileData node containing the generated input data. """ + print("CHECKPOINT 8") input_data = [] for name, structure in inputs.items(): ase_structure = structure.to_ase() @@ -88,6 +92,7 @@ def split_xyz_file(xyz_file: SinglefileData) -> dict: A dictionary with keys 'train', 'test', and 'validation', each containing SinglefileData nodes for the respective datasets. """ + print("CHECKPOINT 9") with xyz_file.open() as file: lines = file.readlines() @@ -133,6 +138,7 @@ def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile: JanusConfigfile A new JanusConfigfile with updated paths. """ + print("CHECKPOINT 10") janus_dict = janusconfigfile.as_dictionary config_parse = janusconfigfile.get_content() @@ -241,12 +247,12 @@ def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile: } pw_task = wg.add_task( - run_pw_calc, name="pw_relax_results", folder=folder_path, dft_inputs=inputs + run_pw_calc, name="pw_relax", folder=folder_path, dft_inputs=inputs ) print("CHECKPOINT1") create_file_task = wg.add_task(create_input, name="create_input") -wg.add_link(pw_task.outputs[0], create_file_task.inputs[0]) +wg.add_link(pw_task.outputs["result"], create_file_task.inputs["inputs"]) print("CHECKPOINT2") split_files_task = wg.add_task( From 34c535b92544db521dd344c0f704ea3b90bafc17 Mon Sep 17 00:00:00 2001 From: federica Date: Wed, 31 Jul 2024 14:44:52 +0100 Subject: [PATCH 7/9] fixed workgraph and submission --- .../workflows/html/trainingworkflow.html | 258 ---------------- aiida_mlip/workflows/hts.py | 114 ------- aiida_mlip/workflows/training.py | 282 ------------------ aiida_mlip/workflows/training_workgraph.py | 215 +++++++++++++ examples/workflows/submit_train_wg.py | 103 +++++++ pyproject.toml | 6 +- 6 files changed, 322 insertions(+), 656 deletions(-) delete mode 100644 aiida_mlip/workflows/html/trainingworkflow.html delete mode 100644 aiida_mlip/workflows/hts.py delete mode 100644 aiida_mlip/workflows/training.py create mode 100644 aiida_mlip/workflows/training_workgraph.py create mode 100644 examples/workflows/submit_train_wg.py diff --git a/aiida_mlip/workflows/html/trainingworkflow.html b/aiida_mlip/workflows/html/trainingworkflow.html deleted file mode 100644 index 0dad339b..00000000 --- a/aiida_mlip/workflows/html/trainingworkflow.html +++ /dev/null @@ -1,258 +0,0 @@ - - - - - - - Rete.js with React in Vanilla JS - - - - - - - - - - - - - - - - - - - - -
- - - diff --git a/aiida_mlip/workflows/hts.py b/aiida_mlip/workflows/hts.py deleted file mode 100644 index 13dca328..00000000 --- a/aiida_mlip/workflows/hts.py +++ /dev/null @@ -1,114 +0,0 @@ -"""Example code for submitting single point calculation""" - -import csv -from pathlib import Path -import sys -import time - -import click - -from aiida.common import NotExistent -from aiida.engine import run, run_get_node, run_get_pk, submit -from aiida.orm import load_code, load_group, load_node -from aiida.plugins import CalculationFactory - -from aiida_mlip.data.config import JanusConfigfile -from aiida_mlip.data.model import ModelData -from aiida_mlip.helpers.help_load import load_structure - - -def run_hts(folder, config, calc, output_filename, code, group, launch): - # Add the required inputs for aiida - metadata = {"options": {"resources": {"num_machines": 1}}} - - # All the other paramenters we want them from the config file - # We want to pass it as a AiiDA data type for the provenance - conf = JanusConfigfile(config) - # Define calculation to run - Calculation = CalculationFactory(f"mlip.{calc}") - model = ModelData.download( - url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", - cache_dir="models", - architecture="mace_mp", - filename="small.model", - ) - list_of_nodes = [] - p = Path(folder) - for child in p.glob("**/*"): - if child.name.endswith("cif"): - print(child.name) - metadata["label"] = f"{child.name}" - # This structure will overwrite the one in the config file if present - structure = load_structure(child.absolute()) - # Run calculation - if launch == "run_get_pk": - result, pk = run_get_pk( - Calculation, - code=code, - struct=structure, - metadata=metadata, - config=conf, - model=model, - ) - list_of_nodes.append(pk) - - group.add_nodes(load_node(pk)) - time.sleep(1) - print(f"Printing results from calculation: {result}") - - if launch == "submit": - result = submit( - Calculation, - code=code, - struct=structure, - metadata=metadata, - config=conf, - model=model, - ) - list_of_nodes.append(result.pk) - - group.add_nodes(load_node(result.pk)) - - print(f"Printing results from calculation: {result}") - - print(f"printing dictionary with all {list_of_nodes}") - # write list of nodes in csv file - with open(output_filename, "w", newline="") as csvfile: - writer = csv.writer(csvfile) - writer.writerow(["name", "PK"]) - for node in list_of_nodes: - writer.writerow([load_node(node).label, node]) - - -@click.command("cli") -@click.option("--folder", type=Path) -@click.option( - "--config", - type=Path, - help="Config file to use", - default="/work4/scd/scarf1228/config_janus.yaml", -) -@click.option("--calc", type=str, help="Calc to run", default="sp") -@click.option("--output_filename", type=str, default="list_nodes.csv") -@click.option("--codelabel", type=str, default="janus@scarf-hq") -@click.option("--group", type=int, default=8) -@click.option( - "--launch", type=str, default="submit", help="can be run_get_pk or submit" -) -def cli(folder, config, calc, output_filename, codelabel, group, launch): - """Click interface.""" - try: - code = load_code(codelabel) - except NotExistent: - print(f"The code '{codelabel}' does not exist.") - sys.exit(1) - try: - group = load_group(group) - except NotExistent: - print(f"The group '{group}' does not exist.") - - run_hts(folder, config, calc, output_filename, code, group, launch) - - -if __name__ == "__main__": - cli() # pylint: disable=no-value-for-parameter diff --git a/aiida_mlip/workflows/training.py b/aiida_mlip/workflows/training.py deleted file mode 100644 index d5f37fde..00000000 --- a/aiida_mlip/workflows/training.py +++ /dev/null @@ -1,282 +0,0 @@ -""" Workgraph to run DFT calculations and use the outputs fpr training a MLIP model.""" - -from pathlib import Path - -from aiida_workgraph import WorkGraph, task -from sklearn.model_selection import train_test_split - -from aiida.orm import Dict, SinglefileData, load_code -from aiida.plugins import CalculationFactory, WorkflowFactory, entry_point - -from aiida_mlip.data.config import JanusConfigfile -from aiida_mlip.helpers.help_load import load_structure -from aiida_quantumespresso.workflows.pw.relax import PwRelaxWorkChain - -#PwRelaxWorkChain = WorkflowFactory("quantumespresso.pw.relax") - -@task.graph_builder(outputs=[{"name": "result", "from": "context.pw"}]) -def run_pw_calc(folder: Path, dft_inputs: dict) -> WorkGraph: - """ - Run a quantumespresso calculation using PwRelaxWorkChain. - - Parameters - ---------- - folder : Path - Path to the folder containing input structure files. - dft_inputs : dict - Dictionary of inputs for the DFT calculations. - - Returns - ------- - WorkGraph - The work graph containing the PW relaxation tasks. - """ - - print("CHECKPOINT 6") - wg = WorkGraph() - for child in folder.glob("**/*xyz"): - structure = load_structure(child) - dft_inputs["base"]["structure"] = structure - dft_inputs["base"]["pw"]["metadata"]["label"] = child.stem - pw_task = wg.add_task(PwRelaxWorkChain, name=f"pw_relax_{child.stem}") - pw_task.set(dft_inputs) - pw_task.set_context({"final_structure": f"pw.{child.stem}"}) - print("CHECKPOINT 7") - return wg - - - -@task.calcfunction() -def create_input(**inputs: dict) -> SinglefileData: - """ - Create input files from given structures. - - Parameters - ---------- - **inputs : dict - Dictionary where keys are names and values are structure data. - - Returns - ------- - SinglefileData - A SinglefileData node containing the generated input data. - """ - print("CHECKPOINT 8") - input_data = [] - for name, structure in inputs.items(): - ase_structure = structure.to_ase() - extxyz_str = ase_structure.write(format="extxyz") - input_data.append(extxyz_str) - temp_file_path = "tmp.extxyz" - with open(temp_file_path, "w") as temp_file: - temp_file.write("\n".join(input_data)) - - file_data = SinglefileData(file=temp_file_path) - - return file_data - - -@task.calcfunction() -def split_xyz_file(xyz_file: SinglefileData) -> dict: - """ - Split an XYZ file into training, testing, and validation datasets. - - Parameters - ---------- - xyz_file : SinglefileData - A SinglefileData node containing the XYZ file. - - Returns - ------- - dict - A dictionary with keys 'train', 'test', and 'validation', each containing - SinglefileData nodes for the respective datasets. - """ - print("CHECKPOINT 9") - with xyz_file.open() as file: - lines = file.readlines() - - data = [line.strip() for line in lines if line.strip()] - - train_data, test_validation_data = train_test_split( - data, test_size=0.4, random_state=42 - ) - test_data, validation_data = train_test_split( - test_validation_data, test_size=0.5, random_state=42 - ) - - train_path = "train.extxyz" - test_path = "test.extxyz" - validation_path = "validation.extxyz" - - with open(train_path, "w") as f: - f.write("\n".join(train_data)) - with open(test_path, "w") as f: - f.write("\n".join(test_data)) - with open(validation_path, "w") as f: - f.write("\n".join(validation_data)) - - return { - "train": SinglefileData(file=train_path), - "test": SinglefileData(file=test_path), - "validation": SinglefileData(file=validation_path), - } - - -@task.calcfunction() -def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile: - """ - Update the JanusConfigfile with new paths for train, test, and validation datasets. - - Parameters - ---------- - janusconfigfile : JanusConfigfile - The original JanusConfigfile. - - Returns - ------- - JanusConfigfile - A new JanusConfigfile with updated paths. - """ - print("CHECKPOINT 10") - janus_dict = janusconfigfile.as_dictionary - config_parse = janusconfigfile.get_content() - - content = config_parse.replace(janus_dict["train_file"], "train.extxyz") - content = content.replace(janus_dict["test_file"], "test.extxyz") - content = content.replace(janus_dict["train_file"], "validation.extxyz") - - new_config_path = "./config.yml" - - with open(new_config_path, "w") as file: - file.write(content) - - return JanusConfigfile(file=new_config_path) - - -wg = WorkGraph("trainingworkflow") -folder_path = Path("/work4/scd/scarf1228/prova_train_workgraph/") -code = load_code("qe-7.1@scarf") -inputs = { - "base": { - "settings": Dict({"GAMMA_ONLY": True}), - "pw": { - "parameters": Dict( - { - "CONTROL": { - "calculation": "vc-relax", - "nstep": 1200, - "etot_conv_thr": 1e-05, - "forc_conv_thr": 1e-04, - }, - "SYSTEM": { - "ecutwfc": 500, - "input_dft": "PBE", - "nspin": 1, - "occupations": "smearing", - "degauss": 0.001, - "smearing": "m-p", - }, - "ELECTRONS": { - "electron_maxstep": 1000, - "scf_must_converge": False, - "conv_thr": 1e-08, - "mixing_beta": 0.25, - "diago_david_ndim": 4, - "startingpot": "atomic", - "startingwfc": "atomic+random", - }, - "IONS": { - "ion_dynamics": "bfgs", - }, - "CELL": { - "cell_dynamics": "bfgs", - "cell_dofree": "ibrav", - }, - } - ), - "code": code, - "metadata": { - "options": { - "resources": { - "num_machines": 4, - "num_mpiprocs_per_machine": 32, - }, - "max_wallclock_seconds": 48 * 60 * 60, - }, - }, - }, - }, - "base_final_scf": { - "pw": { - "parameters": Dict( - { - "CONTROL": { - "calculation": "scf", - "tprnfor": True, - }, - "SYSTEM": { - "ecutwfc": 70, - "ecutrho": 650, - "input_dft": "PBE", - "occupations": "smearing", - "degauss": 0.001, - "smearing": "m-p", - }, - "ELECTRONS": { - "conv_thr": 1e-10, - "mixing_beta": 0.25, - "diago_david_ndim": 4, - "startingpot": "atomic", - "startingwfc": "atomic+random", - }, - } - ), - "code": code, - "metadata": { - "options": { - "resources": { - "num_machines": 1, - "num_mpiprocs_per_machine": 32, - }, - "max_wallclock_seconds": 3 * 60 * 60, - }, - }, - }, - }, -} - -pw_task = wg.add_task( - run_pw_calc, name="pw_relax", folder=folder_path, dft_inputs=inputs -) - -print("CHECKPOINT1") -create_file_task = wg.add_task(create_input, name="create_input") -wg.add_link(pw_task.outputs["result"], create_file_task.inputs["inputs"]) - -print("CHECKPOINT2") -split_files_task = wg.add_task( - split_xyz_file, name="split_xyz", xyz_file=create_file_task.outputs.result -) -print("CHECKPOINT3") -janusconfigfile_path = "/work4/scd/scarf1228/prova_train_workgraph/mlip_train.yml" -janusconfigfile = JanusConfigfile(file=janusconfigfile_path) -update_config_task = wg.add_task( - update_janusconfigfile, - name="update_janusconfigfile", - janusconfigfile=janusconfigfile, -) - -wg.add_link(split_files_task.outputs["result"], update_config_task.inputs["_wait"]) -print("CHECKPOINT4") -training_calc = CalculationFactory("mlip.train") -train_inputs = {} -train_inputs["config_file"] = update_config_task.outputs.result -train_task = wg.add_task( - training_calc, name="training", mlip_config=update_config_task.outputs.result -) - -wg.to_html() -print("CHECKPOINT5") -wg.max_number_jobs = 10 -wg.submit(wait=True) diff --git a/aiida_mlip/workflows/training_workgraph.py b/aiida_mlip/workflows/training_workgraph.py new file mode 100644 index 00000000..19b7dc08 --- /dev/null +++ b/aiida_mlip/workflows/training_workgraph.py @@ -0,0 +1,215 @@ +""" Workgraph to run DFT calculations and use the outputs fpr training a MLIP model.""" + +from pathlib import Path + +from aiida_quantumespresso.workflows.pw.relax import PwRelaxWorkChain +from aiida_workgraph.workgraph import WorkGraph, task +from ase.io import read +from sklearn.model_selection import train_test_split + +from aiida.orm import Dict, SinglefileData, load_code +from aiida.plugins import CalculationFactory, WorkflowFactory, entry_point + +from aiida_mlip.data.config import JanusConfigfile +from aiida_mlip.helpers.help_load import load_structure + +PwRelaxWorkChain = WorkflowFactory("quantumespresso.pw.relax") + + +@task.graph_builder(outputs=[{"name": "result", "from": "context.pw"}]) +def run_pw_calc(folder: Path, dft_inputs: dict) -> WorkGraph: + """ + Run a quantumespresso calculation using PwRelaxWorkChain. + + Parameters + ---------- + folder : Path + Path to the folder containing input structure files. + dft_inputs : dict + Dictionary of inputs for the DFT calculations. + + Returns + ------- + WorkGraph + The work graph containing the PW relaxation tasks. + """ + wg = WorkGraph() + + for child in folder.glob("**/*"): + try: + read(child.as_posix()) + except Exception: # pylint: disable=broad-except + continue + structure = load_structure(child) + dft_inputs["base"]["structure"] = structure + dft_inputs["base"]["pw"]["metadata"]["label"] = child.stem + pw_task = wg.add_task( + PwRelaxWorkChain, name=f"pw_relax_{child.stem}", **dft_inputs + ) + pw_task.set_context({"output_structure": f"pw.{child.stem}"}) + return wg + + +@task.calcfunction() +def create_input(**inputs: dict) -> SinglefileData: + """ + Create input files from given structures. + + Parameters + ---------- + **inputs : dict + Dictionary where keys are names and values are structure data. + + Returns + ------- + SinglefileData + A SinglefileData node containing the generated input data. + """ + + input_data = [] + for name, structure in inputs.items(): + ase_structure = structure.to_ase() + extxyz_str = ase_structure.write(format="extxyz") + input_data.append(extxyz_str) + temp_file_path = "tmp.extxyz" + with open(temp_file_path, "w") as temp_file: + temp_file.write("\n".join(input_data)) + + file_data = SinglefileData(file=temp_file_path) + + return file_data + + +@task.calcfunction() +def split_xyz_file(xyz_file: SinglefileData) -> dict: + """ + Split an XYZ file into training, testing, and validation datasets. + + Parameters + ---------- + xyz_file : SinglefileData + A SinglefileData node containing the XYZ file. + + Returns + ------- + dict + A dictionary with keys 'train', 'test', and 'validation', each containing + SinglefileData nodes for the respective datasets. + """ + + with xyz_file.open() as file: + lines = file.readlines() + + data = [line.strip() for line in lines if line.strip()] + + train_data, test_validation_data = train_test_split( + data, test_size=0.4, random_state=42 + ) + test_data, validation_data = train_test_split( + test_validation_data, test_size=0.5, random_state=42 + ) + + train_path = "train.extxyz" + test_path = "test.extxyz" + validation_path = "validation.extxyz" + + with open(train_path, "w") as f: + f.write("\n".join(train_data)) + with open(test_path, "w") as f: + f.write("\n".join(test_data)) + with open(validation_path, "w") as f: + f.write("\n".join(validation_data)) + + return { + "train": SinglefileData(file=train_path), + "test": SinglefileData(file=test_path), + "validation": SinglefileData(file=validation_path), + } + + +@task.calcfunction() +def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile: + """ + Update the JanusConfigfile with new paths for train, test, and validation datasets. + + Parameters + ---------- + janusconfigfile : JanusConfigfile + The original JanusConfigfile. + + Returns + ------- + JanusConfigfile + A new JanusConfigfile with updated paths. + """ + print("CHECKPOINT 10") + janus_dict = janusconfigfile.as_dictionary + config_parse = janusconfigfile.get_content() + + content = config_parse.replace(janus_dict["train_file"], "train.extxyz") + content = content.replace(janus_dict["test_file"], "test.extxyz") + content = content.replace(janus_dict["train_file"], "validation.extxyz") + + new_config_path = "./config.yml" + + with open(new_config_path, "w") as file: + file.write(content) + + return JanusConfigfile(file=new_config_path) + + +def TrainWorkGraph( + folder_path: Path, inputs: dict, janusconfigfile: JanusConfigfile +) -> WorkGraph: + """ + Create a workflow for optimising using QE and using the results for training mlips. + + Parameters + ---------- + folder_path : Path + Path to the folder containing input structure files. + inputs : dict + Dictionary of inputs for the calculations. + janusconfigfile : JanusConfigfile + File with inputs for janus calculations. + + Returns + ------- + WorkGraph + The workgraph containing the training workflow. + """ + wg = WorkGraph("trainingworkflow") + + pw_task = wg.add_task( + run_pw_calc, name="pw_relax", folder=folder_path, dft_inputs=inputs + ) + + create_file_task = wg.add_task(create_input, name="create_input") + wg.add_link(pw_task.outputs["result"], create_file_task.inputs["inputs"]) + + split_files_task = wg.add_task( + split_xyz_file, name="split_xyz", xyz_file=create_file_task.outputs.result + ) + + update_config_task = wg.add_task( + update_janusconfigfile, + name="update_janusconfigfile", + janusconfigfile=janusconfigfile, + ) + + wg.add_link(split_files_task.outputs["result"], update_config_task.inputs["_wait"]) + + training_calc = CalculationFactory("mlip.train") + train_inputs = {} + train_inputs["config_file"] = update_config_task.outputs.result + train_task = wg.add_task( + training_calc, name="training", mlip_config=update_config_task.outputs.result + ) + wg.group_outputs = [{"name": "opt_structures", "from": "pw_task.output_structures"}] + wg.group_outputs = [{"name": "final_model", "from": "train_task.outputs.model"}] + + wg.to_html() + + wg.max_number_jobs = 10 + wg.submit(wait=True) + return wg diff --git a/examples/workflows/submit_train_wg.py b/examples/workflows/submit_train_wg.py new file mode 100644 index 00000000..bc9b4fd1 --- /dev/null +++ b/examples/workflows/submit_train_wg.py @@ -0,0 +1,103 @@ +"""Example submission for hts workgraph.""" + +from pathlib import Path + +from aiida.orm import Dict, load_code + +from aiida_mlip.data.config import JanusConfigfile +from aiida_mlip.workflows.training_workgraph import TrainWorkGraph + +folder_path = Path("/work4/scd/scarf1228/prova_train_workgraph/") +code = load_code("qe-7.1@scarf") +inputs = { + "base": { + "settings": Dict({"GAMMA_ONLY": True}), + "pw": { + "parameters": Dict( + { + "CONTROL": { + "calculation": "vc-relax", + "nstep": 1200, + "etot_conv_thr": 1e-05, + "forc_conv_thr": 1e-04, + }, + "SYSTEM": { + "ecutwfc": 500, + "input_dft": "PBE", + "nspin": 1, + "occupations": "smearing", + "degauss": 0.001, + "smearing": "m-p", + }, + "ELECTRONS": { + "electron_maxstep": 1000, + "scf_must_converge": False, + "conv_thr": 1e-08, + "mixing_beta": 0.25, + "diago_david_ndim": 4, + "startingpot": "atomic", + "startingwfc": "atomic+random", + }, + "IONS": { + "ion_dynamics": "bfgs", + }, + "CELL": { + "cell_dynamics": "bfgs", + "cell_dofree": "ibrav", + }, + } + ), + "code": code, + "metadata": { + "options": { + "resources": { + "num_machines": 4, + "num_mpiprocs_per_machine": 32, + }, + "max_wallclock_seconds": 48 * 60 * 60, + }, + }, + }, + }, + "base_final_scf": { + "pw": { + "parameters": Dict( + { + "CONTROL": { + "calculation": "scf", + "tprnfor": True, + }, + "SYSTEM": { + "ecutwfc": 70, + "ecutrho": 650, + "input_dft": "PBE", + "occupations": "smearing", + "degauss": 0.001, + "smearing": "m-p", + }, + "ELECTRONS": { + "conv_thr": 1e-10, + "mixing_beta": 0.25, + "diago_david_ndim": 4, + "startingpot": "atomic", + "startingwfc": "atomic+random", + }, + } + ), + "code": code, + "metadata": { + "options": { + "resources": { + "num_machines": 1, + "num_mpiprocs_per_machine": 32, + }, + "max_wallclock_seconds": 3 * 60 * 60, + }, + }, + }, + }, +} +janusconfigfile_path = "/work4/scd/scarf1228/prova_train_workgraph/mlip_train.yml" +janusconfigfile = JanusConfigfile(file=janusconfigfile_path) + +TrainWorkGraph(folder_path, inputs, janusconfigfile) diff --git a/pyproject.toml b/pyproject.toml index 329d678e..657920dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,9 +30,8 @@ python = "^3.9" aiida-core = "^2.6" ase = "^3.23.0" voluptuous = "^0.14" -#janus-core = "^v0.6.2" +janus-core = "^v0.6.3b0" aiida-workgraph = {extras = ["widget"], version = "^0.3.14"} -janus-core = { git = "https://github.com/stfc/janus-core.git", branch = "main" } aiida-quantumespresso = "^v4.6.0" scikit-learn = "^1.5.1" @@ -83,6 +82,9 @@ build-backend = "poetry.core.masonry.api" "mlip.md_parser" = "aiida_mlip.parsers.md_parser:MDParser" "mlip.train_parser" = "aiida_mlip.parsers.train_parser:TrainParser" +[tool.poetry.plugins."aiida.workflows"] +"mlip.training_wg" = "aiida_mlip.workflows.traning_workgraph:TrainWorkGraph" + [tool.black] line-length = 88 From 1f2389e2197db6400ec9f588044d79f499c6b5f8 Mon Sep 17 00:00:00 2001 From: federica Date: Wed, 31 Jul 2024 15:58:13 +0100 Subject: [PATCH 8/9] fix pre-commit? --- aiida_mlip/workflows/training_workgraph.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/aiida_mlip/workflows/training_workgraph.py b/aiida_mlip/workflows/training_workgraph.py index 19b7dc08..3eef6ac2 100644 --- a/aiida_mlip/workflows/training_workgraph.py +++ b/aiida_mlip/workflows/training_workgraph.py @@ -3,12 +3,12 @@ from pathlib import Path from aiida_quantumespresso.workflows.pw.relax import PwRelaxWorkChain -from aiida_workgraph.workgraph import WorkGraph, task +from aiida_workgraph import WorkGraph, task from ase.io import read from sklearn.model_selection import train_test_split -from aiida.orm import Dict, SinglefileData, load_code -from aiida.plugins import CalculationFactory, WorkflowFactory, entry_point +from aiida.orm import SinglefileData +from aiida.plugins import CalculationFactory, WorkflowFactory from aiida_mlip.data.config import JanusConfigfile from aiida_mlip.helpers.help_load import load_structure @@ -67,12 +67,12 @@ def create_input(**inputs: dict) -> SinglefileData: """ input_data = [] - for name, structure in inputs.items(): + for _, structure in inputs.items(): ase_structure = structure.to_ase() extxyz_str = ase_structure.write(format="extxyz") input_data.append(extxyz_str) temp_file_path = "tmp.extxyz" - with open(temp_file_path, "w") as temp_file: + with open(temp_file_path, "w", encoding="utf8") as temp_file: temp_file.write("\n".join(input_data)) file_data = SinglefileData(file=temp_file_path) @@ -113,11 +113,11 @@ def split_xyz_file(xyz_file: SinglefileData) -> dict: test_path = "test.extxyz" validation_path = "validation.extxyz" - with open(train_path, "w") as f: + with open(train_path, "w", encoding="utf8") as f: f.write("\n".join(train_data)) - with open(test_path, "w") as f: + with open(test_path, "w", encoding="utf8") as f: f.write("\n".join(test_data)) - with open(validation_path, "w") as f: + with open(validation_path, "w", encoding="utf8") as f: f.write("\n".join(validation_data)) return { @@ -152,12 +152,13 @@ def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile: new_config_path = "./config.yml" - with open(new_config_path, "w") as file: + with open(new_config_path, "w", encoding="utf8") as file: file.write(content) return JanusConfigfile(file=new_config_path) +# pylint: disable=unused-variable def TrainWorkGraph( folder_path: Path, inputs: dict, janusconfigfile: JanusConfigfile ) -> WorkGraph: @@ -202,6 +203,7 @@ def TrainWorkGraph( training_calc = CalculationFactory("mlip.train") train_inputs = {} train_inputs["config_file"] = update_config_task.outputs.result + train_task = wg.add_task( training_calc, name="training", mlip_config=update_config_task.outputs.result ) From 3deb4ff024fe13c0f3b22b72a1411c8f3b08d987 Mon Sep 17 00:00:00 2001 From: Alin Marin Elena Date: Wed, 20 Nov 2024 09:09:18 +0000 Subject: [PATCH 9/9] Apply suggestions from code review Co-authored-by: Xing Wang --- aiida_mlip/workflows/training_workgraph.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/aiida_mlip/workflows/training_workgraph.py b/aiida_mlip/workflows/training_workgraph.py index 3eef6ac2..f11351be 100644 --- a/aiida_mlip/workflows/training_workgraph.py +++ b/aiida_mlip/workflows/training_workgraph.py @@ -80,7 +80,10 @@ def create_input(**inputs: dict) -> SinglefileData: return file_data -@task.calcfunction() +@task.calcfunction(outputs = [{"name": train"}, + {"name": test"}, + {"name": "validation"} + ]) def split_xyz_file(xyz_file: SinglefileData) -> dict: """ Split an XYZ file into training, testing, and validation datasets. @@ -198,7 +201,7 @@ def TrainWorkGraph( janusconfigfile=janusconfigfile, ) - wg.add_link(split_files_task.outputs["result"], update_config_task.inputs["_wait"]) + wg.add_link(split_files_task.outputs["_wait"], update_config_task.inputs["_wait"]) training_calc = CalculationFactory("mlip.train") train_inputs = {}