From 19b831a935563d0664433b7fa6ec0a339172adb2 Mon Sep 17 00:00:00 2001
From: federica <federicazanca8@gmail.com>
Date: Tue, 9 Jul 2024 14:03:59 +0100
Subject: [PATCH 1/9] initial draft for workgraph, pre-commit fail

---
 aiida_mlip/workflows/hts.py      | 114 +++++++++++++++++++++++++++++++
 aiida_mlip/workflows/training.py |  65 ++++++++++++++++++
 pyproject.toml                   |   4 +-
 3 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 aiida_mlip/workflows/hts.py
 create mode 100644 aiida_mlip/workflows/training.py

diff --git a/aiida_mlip/workflows/hts.py b/aiida_mlip/workflows/hts.py
new file mode 100644
index 00000000..13dca328
--- /dev/null
+++ b/aiida_mlip/workflows/hts.py
@@ -0,0 +1,114 @@
+"""Example code for submitting single point calculation"""
+
+import csv
+from pathlib import Path
+import sys
+import time
+
+import click
+
+from aiida.common import NotExistent
+from aiida.engine import run, run_get_node, run_get_pk, submit
+from aiida.orm import load_code, load_group, load_node
+from aiida.plugins import CalculationFactory
+
+from aiida_mlip.data.config import JanusConfigfile
+from aiida_mlip.data.model import ModelData
+from aiida_mlip.helpers.help_load import load_structure
+
+
+def run_hts(folder, config, calc, output_filename, code, group, launch):
+    # Add the required inputs for aiida
+    metadata = {"options": {"resources": {"num_machines": 1}}}
+
+    # All the other paramenters we want them from the config file
+    # We want to pass it as a AiiDA data type for the provenance
+    conf = JanusConfigfile(config)
+    # Define calculation to run
+    Calculation = CalculationFactory(f"mlip.{calc}")
+    model = ModelData.download(
+        url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model",
+        cache_dir="models",
+        architecture="mace_mp",
+        filename="small.model",
+    )
+    list_of_nodes = []
+    p = Path(folder)
+    for child in p.glob("**/*"):
+        if child.name.endswith("cif"):
+            print(child.name)
+            metadata["label"] = f"{child.name}"
+            # This structure will overwrite the one in the config file if present
+            structure = load_structure(child.absolute())
+            # Run calculation
+            if launch == "run_get_pk":
+                result, pk = run_get_pk(
+                    Calculation,
+                    code=code,
+                    struct=structure,
+                    metadata=metadata,
+                    config=conf,
+                    model=model,
+                )
+                list_of_nodes.append(pk)
+
+                group.add_nodes(load_node(pk))
+                time.sleep(1)
+                print(f"Printing results from calculation: {result}")
+
+            if launch == "submit":
+                result = submit(
+                    Calculation,
+                    code=code,
+                    struct=structure,
+                    metadata=metadata,
+                    config=conf,
+                    model=model,
+                )
+                list_of_nodes.append(result.pk)
+
+                group.add_nodes(load_node(result.pk))
+
+                print(f"Printing results from calculation: {result}")
+
+    print(f"printing dictionary with all {list_of_nodes}")
+    # write list of nodes in csv file
+    with open(output_filename, "w", newline="") as csvfile:
+        writer = csv.writer(csvfile)
+        writer.writerow(["name", "PK"])
+        for node in list_of_nodes:
+            writer.writerow([load_node(node).label, node])
+
+
+@click.command("cli")
+@click.option("--folder", type=Path)
+@click.option(
+    "--config",
+    type=Path,
+    help="Config file to use",
+    default="/work4/scd/scarf1228/config_janus.yaml",
+)
+@click.option("--calc", type=str, help="Calc to run", default="sp")
+@click.option("--output_filename", type=str, default="list_nodes.csv")
+@click.option("--codelabel", type=str, default="janus@scarf-hq")
+@click.option("--group", type=int, default=8)
+@click.option(
+    "--launch", type=str, default="submit", help="can be run_get_pk or submit"
+)
+def cli(folder, config, calc, output_filename, codelabel, group, launch):
+    """Click interface."""
+    try:
+        code = load_code(codelabel)
+    except NotExistent:
+        print(f"The code '{codelabel}' does not exist.")
+        sys.exit(1)
+    try:
+        group = load_group(group)
+    except NotExistent:
+        print(f"The group '{group}' does not exist.")
+
+    run_hts(folder, config, calc, output_filename, code, group, launch)
+
+
+if __name__ == "__main__":
+    cli()  # pylint: disable=no-value-for-parameter
diff --git a/aiida_mlip/workflows/training.py b/aiida_mlip/workflows/training.py
new file mode 100644
index 00000000..015a3abb
--- /dev/null
+++ b/aiida_mlip/workflows/training.py
@@ -0,0 +1,65 @@
+from aiida_workgraph import Workgraph, task
+
+from aiida.engine import submit
+from aiida.orm import load_node
+from aiida.plugins import CalculationFactory, WorkflowFactory
+
+from aiida_mlip.helpers.help_load import load_structure
+
+
+# define DFT task
+@task.calcfunction()
+def submit_DFT(child, dft_inputs, group):
+        print(child.name)
+        dft_inputs['metadata']['label']=f"{child.name}"
+    optcalculation = WorkflowFactory("quantumespresso.pw.relax")
+    struc = load_structure(child)
+    dft_inputs['struct']=struc
+    result = submit(optcalculation, **inputs)
+    group.add_nodes(load_node(result.pk))
+    return group
+
+#syntax of this wrong
+@task.calcfunction()
+def create_input(group):
+    with open("input_file") as input_file:
+        for node in group:
+            #get the output structure
+            structure = node.outputs.structure
+            #convert it to extxyz
+            structure.to_ase()
+            # add to file
+            input_file.writelines(structure)
+    return input_file
+
+
+# define traning task
+@task.calcfunction()
+def training(input_file, train_inputs):
+    training = CalculationFactory("mlip.train")
+    #check name of input file in training
+    train_inputs['xyz_input'] = input_file
+    future = submit(training, **train_inputs)
+    return future
+
+
+
+wg = WorkGraph("training_workflow")
+
+for child in folder.glob('**/*'):
+        if child.name.endswith("cif"):
+            submitdft_task = wg.tasks.new(submit_DFT, name="submission")
+
+# link the output of the `add` task to one of the `x` input of the `multiply` task.
+create_file_task = wg.tasks.new(create_input, name="createinput", group = submitdft_task.outputs["result"])
+
+train_task = wg.tasks.new(training, name="training", input_file=create_file_task.outputs['input_file'])
+
+# export the workgraph to html file so that it can be visualized in a browser
+wg.to_html()
+# comment out the following line to visualize the workgraph in jupyter-notebook
+# wg
+
+# Set the maximum number of running jobs inside the WorkGraph
+wg.max_number_jobs = 10
+wg.submit(wait=True)
diff --git a/pyproject.toml b/pyproject.toml
index 5fe3ceec..a426e7cd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,9 @@ python = "^3.9"
 aiida-core = "^2.6"
 ase = "^3.23.0"
 voluptuous = "^0.14"
-janus-core = "^v0.6.0b0"
+janus-core = "^v0.6.2"
+aiida-workgraph = "^0.3.7"
+#aiida-quantumespresso = "^4.6.0"
 
 [tool.poetry.group.dev.dependencies]
 coverage = {extras = ["toml"], version = "^7.4.1"}

From 6df534da3c536f758f8000c6801c37140d4cf2b0 Mon Sep 17 00:00:00 2001
From: federica <federicazanca8@gmail.com>
Date: Fri, 19 Jul 2024 09:47:05 +0100
Subject: [PATCH 2/9] minor change

---
 aiida_mlip/workflows/training.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/aiida_mlip/workflows/training.py b/aiida_mlip/workflows/training.py
index 015a3abb..508685bf 100644
--- a/aiida_mlip/workflows/training.py
+++ b/aiida_mlip/workflows/training.py
@@ -12,12 +12,12 @@
 def submit_DFT(child, dft_inputs, group):
         print(child.name)
         dft_inputs['metadata']['label']=f"{child.name}"
-    optcalculation = WorkflowFactory("quantumespresso.pw.relax")
-    struc = load_structure(child)
-    dft_inputs['struct']=struc
-    result = submit(optcalculation, **inputs)
-    group.add_nodes(load_node(result.pk))
-    return group
+        optcalculation = WorkflowFactory("quantumespresso.pw.relax")
+        struc = load_structure(child)
+        dft_inputs['struct']=struc
+        result = submit(optcalculation, **dft_inputs)
+        group.add_nodes(load_node(result.pk))
+        return group
 
 #syntax of this wrong
 @task.calcfunction()

From 2c861734bbcb423c7a06e2a017d88b35c25cbedc Mon Sep 17 00:00:00 2001
From: federica <federicazanca8@gmail.com>
Date: Mon, 22 Jul 2024 14:16:37 +0100
Subject: [PATCH 3/9] pyproject changes?

---
 pyproject.toml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a426e7cd..8705cbfe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,9 +30,10 @@ python = "^3.9"
 aiida-core = "^2.6"
 ase = "^3.23.0"
 voluptuous = "^0.14"
-janus-core = "^v0.6.2"
+#janus-core = "^v0.6.2"
 aiida-workgraph = "^0.3.7"
-#aiida-quantumespresso = "^4.6.0"
+janus-core = { git = "https://github.com/stfc/janus-core.git", branch = "main" }
+aiida-quantumespresso = "^v4.6.0"
 
 [tool.poetry.group.dev.dependencies]
 coverage = {extras = ["toml"], version = "^7.4.1"}

From e917a4f2f8141cbbae0f63123ff862160294a270 Mon Sep 17 00:00:00 2001
From: federica <federicazanca8@gmail.com>
Date: Fri, 26 Jul 2024 10:25:00 +0100
Subject: [PATCH 4/9] workgraph mostly done but pre-commits fail

---
 aiida_mlip/workflows/training.py | 303 ++++++++++++++++++++++++++-----
 pyproject.toml                   |   3 +-
 2 files changed, 259 insertions(+), 47 deletions(-)

diff --git a/aiida_mlip/workflows/training.py b/aiida_mlip/workflows/training.py
index 508685bf..e1ea65e4 100644
--- a/aiida_mlip/workflows/training.py
+++ b/aiida_mlip/workflows/training.py
@@ -1,65 +1,276 @@
-from aiida_workgraph import Workgraph, task
+""" Workgraph to run DFT calculations and use the outputs fpr training a MLIP model."""
 
-from aiida.engine import submit
-from aiida.orm import load_node
+from pathlib import Path
+
+from aiida_workgraph import WorkGraph, task
+from sklearn.model_selection import train_test_split
+
+from aiida.orm import Dict, SinglefileData, load_code
 from aiida.plugins import CalculationFactory, WorkflowFactory
 
+from aiida_mlip.data.config import JanusConfigfile
 from aiida_mlip.helpers.help_load import load_structure
 
+PwRelaxWorkChain = WorkflowFactory("quantumespresso.pw.relax")
+
+
+@task.graph_builder(outputs=[{"name": "result", "from": "context.pw"}])
+def run_pw_calc(folder: Path, dft_inputs: dict) -> WorkGraph:
+    """
+    Run a quantumespresso calculation using PwRelaxWorkChain.
+
+    Parameters
+    ----------
+    folder : Path
+        Path to the folder containing input structure files.
+    dft_inputs : dict
+        Dictionary of inputs for the DFT calculations.
+
+    Returns
+    -------
+    WorkGraph
+        The work graph containing the PW relaxation tasks.
+    """
+    wg = WorkGraph()
+    for child in folder.glob("**/*xyz"):
+        structure = load_structure(child)
+        dft_inputs["base"]["structure"] = structure
+        dft_inputs["base"]["pw"]["metadata"]["label"] = child.stem
+        pw_task = wg.add_task(
+            PwRelaxWorkChain, name=f"pw_relax{child.stem}", **dft_inputs
+        )
+        pw_task.set_context({"result": f"pw_relax_{child}"})
+    return wg
+
 
-# define DFT task
 @task.calcfunction()
-def submit_DFT(child, dft_inputs, group):
-        print(child.name)
-        dft_inputs['metadata']['label']=f"{child.name}"
-        optcalculation = WorkflowFactory("quantumespresso.pw.relax")
-        struc = load_structure(child)
-        dft_inputs['struct']=struc
-        result = submit(optcalculation, **dft_inputs)
-        group.add_nodes(load_node(result.pk))
-        return group
-
-#syntax of this wrong
+def create_input(**inputs: dict) -> SinglefileData:
+    """
+    Create input files from given structures.
+
+    Parameters
+    ----------
+    **inputs : dict
+        Dictionary where keys are names and values are structure data.
+
+    Returns
+    -------
+    SinglefileData
+        A SinglefileData node containing the generated input data.
+    """
+    input_data = []
+    for name, structure in inputs.items():
+        ase_structure = structure.to_ase()
+        extxyz_str = ase_structure.write(format="extxyz")
+        input_data.append(extxyz_str)
+    temp_file_path = "tmp.extxyz"
+    with open(temp_file_path, "w") as temp_file:
+        temp_file.write("\n".join(input_data))
+
+    file_data = SinglefileData(file=temp_file_path)
+
+    return file_data
+
+
 @task.calcfunction()
-def create_input(group):
-    with open("input_file") as input_file:
-        for node in group:
-            #get the output structure
-            structure = node.outputs.structure
-            #convert it to extxyz
-            structure.to_ase()
-            # add to file
-            input_file.writelines(structure)
-    return input_file
-
-
-# define traning task
+def split_xyz_file(xyz_file: SinglefileData) -> dict:
+    """
+    Split an XYZ file into training, testing, and validation datasets.
+
+    Parameters
+    ----------
+    xyz_file : SinglefileData
+        A SinglefileData node containing the XYZ file.
+
+    Returns
+    -------
+    dict
+        A dictionary with keys 'train', 'test', and 'validation', each containing
+        SinglefileData nodes for the respective datasets.
+    """
+    with xyz_file.open() as file:
+        lines = file.readlines()
+
+    data = [line.strip() for line in lines if line.strip()]
+
+    train_data, test_validation_data = train_test_split(
+        data, test_size=0.4, random_state=42
+    )
+    test_data, validation_data = train_test_split(
+        test_validation_data, test_size=0.5, random_state=42
+    )
+
+    train_path = "train.extxyz"
+    test_path = "test.extxyz"
+    validation_path = "validation.extxyz"
+
+    with open(train_path, "w") as f:
+        f.write("\n".join(train_data))
+    with open(test_path, "w") as f:
+        f.write("\n".join(test_data))
+    with open(validation_path, "w") as f:
+        f.write("\n".join(validation_data))
+
+    return {
+        "train": SinglefileData(file=train_path),
+        "test": SinglefileData(file=test_path),
+        "validation": SinglefileData(file=validation_path),
+    }
+
+
 @task.calcfunction()
-def training(input_file, train_inputs):
-    training = CalculationFactory("mlip.train")
-    #check name of input file in training
-    train_inputs['xyz_input'] = input_file
-    future = submit(training, **train_inputs)
-    return future
+def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile:
+    """
+    Update the JanusConfigfile with new paths for train, test, and validation datasets.
 
+    Parameters
+    ----------
+    janusconfigfile : JanusConfigfile
+        The original JanusConfigfile.
 
+    Returns
+    -------
+    JanusConfigfile
+        A new JanusConfigfile with updated paths.
+    """
+    janus_dict = janusconfigfile.as_dictionary
+    config_parse = janusconfigfile.get_content()
 
-wg = WorkGraph("training_workflow")
+    content = config_parse.replace(janus_dict["train_file"], "train.extxyz")
+    content = content.replace(janus_dict["test_file"], "test.extxyz")
+    content = content.replace(janus_dict["train_file"], "validation.extxyz")
 
-for child in folder.glob('**/*'):
-        if child.name.endswith("cif"):
-            submitdft_task = wg.tasks.new(submit_DFT, name="submission")
+    new_config_path = "./config.yml"
 
-# link the output of the `add` task to one of the `x` input of the `multiply` task.
-create_file_task = wg.tasks.new(create_input, name="createinput", group = submitdft_task.outputs["result"])
+    with open(new_config_path, "w") as file:
+        file.write(content)
 
-train_task = wg.tasks.new(training, name="training", input_file=create_file_task.outputs['input_file'])
+    return JanusConfigfile(file=new_config_path)
 
-# export the workgraph to html file so that it can be visualized in a browser
-wg.to_html()
-# comment out the following line to visualize the workgraph in jupyter-notebook
-# wg
 
-# Set the maximum number of running jobs inside the WorkGraph
+wg = WorkGraph("trainingworkflow")
+folder_path = Path("/home/federica/prova_training_wg")
+code = load_code("qe-7.1@scarf1")
+inputs = {
+    "base": {
+        "settings": Dict({"GAMMA_ONLY": True}),
+        "pw": {
+            "parameters": Dict(
+                {
+                    "CONTROL": {
+                        "calculation": "vc-relax",
+                        "nstep": 1200,
+                        "etot_conv_thr": 1e-05,
+                        "forc_conv_thr": 1e-04,
+                    },
+                    "SYSTEM": {
+                        "ecutwfc": 500,
+                        "input_dft": "PBE",
+                        "nspin": 1,
+                        "occupations": "smearing",
+                        "degauss": 0.001,
+                        "smearing": "m-p",
+                    },
+                    "ELECTRONS": {
+                        "electron_maxstep": 1000,
+                        "scf_must_converge": False,
+                        "conv_thr": 1e-08,
+                        "mixing_beta": 0.25,
+                        "diago_david_ndim": 4,
+                        "startingpot": "atomic",
+                        "startingwfc": "atomic+random",
+                    },
+                    "IONS": {
+                        "ion_dynamics": "bfgs",
+                    },
+                    "CELL": {
+                        "cell_dynamics": "bfgs",
+                        "cell_dofree": "ibrav",
+                    },
+                }
+            ),
+            "code": code,
+            "metadata": {
+                "options": {
+                    "resources": {
+                        "num_machines": 4,
+                        "num_mpiprocs_per_machine": 32,
+                    },
+                    "max_wallclock_seconds": 48 * 60 * 60,
+                },
+            },
+        },
+    },
+    "base_final_scf": {
+        "pw": {
+            "parameters": Dict(
+                {
+                    "CONTROL": {
+                        "calculation": "scf",
+                        "tprnfor": True,
+                    },
+                    "SYSTEM": {
+                        "ecutwfc": 70,
+                        "ecutrho": 650,
+                        "input_dft": "PBE",
+                        "occupations": "smearing",
+                        "degauss": 0.001,
+                        "smearing": "m-p",
+                    },
+                    "ELECTRONS": {
+                        "conv_thr": 1e-10,
+                        "mixing_beta": 0.25,
+                        "diago_david_ndim": 4,
+                        "startingpot": "atomic",
+                        "startingwfc": "atomic+random",
+                    },
+                }
+            ),
+            "code": code,
+            "metadata": {
+                "options": {
+                    "resources": {
+                        "num_machines": 1,
+                        "num_mpiprocs_per_machine": 32,
+                    },
+                    "max_wallclock_seconds": 3 * 60 * 60,
+                },
+            },
+        },
+    },
+}
+
+pw_task = wg.add_task(
+    run_pw_calc, name="pw_relax_results", folder=folder_path, dft_inputs=inputs
+)
+
+print("CHECKPOINT1")
+create_file_task = wg.add_task(create_input, name="create_input")
+wg.add_link(pw_task.outputs[0], create_file_task.inputs[0])
+
+print("CHECKPOINT2")
+split_files_task = wg.add_task(
+    split_xyz_file, name="split_xyz", xyz_file=create_file_task.outputs.result
+)
+print("CHECKPOINT3")
+janusconfigfile_path = "/home/federica/prova_training_wg/mlip_train.yml"
+janusconfigfile = JanusConfigfile(file=janusconfigfile_path)
+update_config_task = wg.add_task(
+    update_janusconfigfile,
+    name="update_janusconfigfile",
+    janusconfigfile=janusconfigfile,
+)
+
+wg.add_link(split_files_task.outputs["result"], update_config_task.inputs["_wait"])
+print("CHECKPOINT4")
+training_calc = CalculationFactory("mlip.train")
+train_inputs = {}
+train_inputs["config_file"] = update_config_task.outputs.result
+train_task = wg.add_task(
+    training_calc, name="training", mlip_config=update_config_task.outputs.result
+)
+
+wg.to_html()
+print("CHECKPOINT5")
 wg.max_number_jobs = 10
 wg.submit(wait=True)
diff --git a/pyproject.toml b/pyproject.toml
index 8705cbfe..329d678e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,9 +31,10 @@ aiida-core = "^2.6"
 ase = "^3.23.0"
 voluptuous = "^0.14"
 #janus-core = "^v0.6.2"
-aiida-workgraph = "^0.3.7"
+aiida-workgraph = {extras = ["widget"], version = "^0.3.14"}
 janus-core = { git = "https://github.com/stfc/janus-core.git", branch = "main" }
 aiida-quantumespresso = "^v4.6.0"
+scikit-learn = "^1.5.1"
 
 [tool.poetry.group.dev.dependencies]
 coverage = {extras = ["toml"], version = "^7.4.1"}

From d128b0160f98ee99584e3cfb3f3a9471d42b6e2b Mon Sep 17 00:00:00 2001
From: federicazanca <federicazanca*@gmail.com>
Date: Tue, 30 Jul 2024 13:53:26 +0100
Subject: [PATCH 5/9] change paths

---
 .../workflows/html/trainingworkflow.html      | 258 ++++++++++++++++++
 aiida_mlip/workflows/training.py              |  10 +-
 2 files changed, 263 insertions(+), 5 deletions(-)
 create mode 100644 aiida_mlip/workflows/html/trainingworkflow.html

diff --git a/aiida_mlip/workflows/html/trainingworkflow.html b/aiida_mlip/workflows/html/trainingworkflow.html
new file mode 100644
index 00000000..a15f40e4
--- /dev/null
+++ b/aiida_mlip/workflows/html/trainingworkflow.html
@@ -0,0 +1,258 @@
+
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Rete.js with React in Vanilla JS</title>
+    <!-- Import React, ReactDOM, and Babel from CDN -->
+    <script src="https://unpkg.com/react@18.2.0/umd/react.development.js" crossorigin></script>
+    <script src="https://unpkg.com/react-dom@18.2.0/umd/react-dom.development.js" crossorigin></script>
+    <script src="https://unpkg.com/@babel/standalone/babel.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/react-is/18.2.0/umd/react-is.production.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/styled-components@5.3.6/dist/styled-components.min.js"></script>
+    <script src="https://unpkg.com/elkjs@0.8.2/lib/elk.bundled.js"></script>
+
+    <!-- Import Rete.js and its plugins from CDN -->
+    <script src="https://cdn.jsdelivr.net/npm/rete@2.0.3/rete.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/rete-area-plugin@2.0.3/rete-area-plugin.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/rete-connection-plugin@2.0.2/rete-connection-plugin.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/rete-render-utils@2.0.2/rete-render-utils.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/rete-react-plugin@2.0.5/rete-react-plugin.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/rete-auto-arrange-plugin@2.0.1/rete-auto-arrange-plugin.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/rete-minimap-plugin@2.0.1/rete-minimap-plugin.min.js"></script>
+
+    <style>
+        .App {
+            font-family: sans-serif;
+            background: rgb(200, 190, 190);
+        }
+        .rete {
+          position: relative;
+          font-size: 1rem;
+          margin: 1em;
+          border-radius: 1em;
+          text-align: left;
+        }
+        #fullscreen-btn {
+            margin-left: 10px;
+        }
+        body {
+            overflow: hidden;
+            margin: 0;
+            padding: 0;
+        }
+    </style>
+</head>
+<body>
+    <div id="root"></div>
+    <script type="text/babel">
+
+        const { useState, useRef, useEffect } = React;
+        const { createRoot } = ReactDOM;
+        const { NodeEditor, ClassicPreset } = Rete;
+        const { AreaPlugin, AreaExtensions } = ReteAreaPlugin;
+        const { ConnectionPlugin, Presets: ConnectionPresets } = ReteConnectionPlugin;
+        const { ReactPlugin, Presets } = ReteReactPlugin;
+        const { AutoArrangePlugin, Presets: ArrangePresets, ArrangeAppliers} = ReteAutoArrangePlugin;
+        const { MinimapExtra, MinimapPlugin } = ReteMinimapPlugin;
+        const { RenderUtils } = ReteRenderUtils;
+        const styled = window.styled;
+
+        const workgraphData = {"name": "trainingworkflow", "uuid": "ad06a2d4-4e59-11ef-8066-3cecef4478be", "state": "CREATED", "nodes": {"pw_relax_results": {"label": "pw_relax_results", "inputs": [{"name": "folder", "identifier": "Any", "uuid": "ad0ea862-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad0ea236-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "dft_inputs", "identifier": "Any", "uuid": "ad0eab82-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad0ea236-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}], "outputs": [{"name": "result"}], "position": [30, 30]}, "create_input": {"label": "create_input", "inputs": [{"name": "metadata"}], "outputs": [{"name": "result"}], "position": [60, 60]}, "split_xyz": {"label": "split_xyz", "inputs": [{"name": "xyz_file", "identifier": "Any", "uuid": "ad0f7c1a-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad0f6f86-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [{"from_node": "create_input", "from_socket": "result", "from_socket_uuid": "ad0f1f7c-4e59-11ef-8066-3cecef4478be"}], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "xyz_file"}], "outputs": [{"name": "result"}], "position": [90, 90]}, "update_janusconfigfile": {"label": "update_janusconfigfile", "inputs": [{"name": "janusconfigfile", "identifier": "Any", "uuid": "ad11cbbe-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad11be80-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "_wait"}], "outputs": [{"name": "result"}], "position": [120, 120]}, "training": {"label": "training", "inputs": [{"name": "mlip_config", "identifier": "Any", "uuid": "ad143656-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad13f862-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [{"from_node": "update_janusconfigfile", "from_socket": "result", "from_socket_uuid": "ad11cd9e-4e59-11ef-8066-3cecef4478be"}], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "mlip_config"}], "outputs": [], "position": [150, 150]}}, "links": [{"from_socket": "result", "from_node": "pw_relax_results", "from_socket_uuid": "ad0eae66-4e59-11ef-8066-3cecef4478be", "to_socket": "metadata", "to_node": "create_input", "state": false}, {"from_socket": "result", "from_node": "create_input", "from_socket_uuid": "ad0f1f7c-4e59-11ef-8066-3cecef4478be", "to_socket": "xyz_file", "to_node": "split_xyz", "state": false}, {"from_socket": "result", "from_node": "split_xyz", "from_socket_uuid": "ad0f7eea-4e59-11ef-8066-3cecef4478be", "to_socket": "_wait", "to_node": "update_janusconfigfile", "state": false}, {"from_socket": "result", "from_node": "update_janusconfigfile", "from_socket_uuid": "ad11cd9e-4e59-11ef-8066-3cecef4478be", "to_socket": "mlip_config", "to_node": "training", "state": false}]}
+
+        // Define Schemes to use in vanilla JS
+        const Schemes = {
+            Node: ClassicPreset.Node,
+            Connection: ClassicPreset.Connection
+        };
+
+        class Node extends ClassicPreset.Node {
+          width = 180;
+          height = 100;
+        }
+        class Connection extends ClassicPreset.Connection {}
+
+        function createDynamicNode(nodeData) {
+          const node = new Node(nodeData.label);
+          // resize the node based on the max length of the input/output names
+          let maxSocketNameLength = 0;
+          nodeData.inputs.forEach((input) => {
+            let socket = new ClassicPreset.Socket(input.name);
+            if (!node.inputs.hasOwnProperty(input.name)) {
+              node.addInput(input.name, new ClassicPreset.Input(socket, input.name));
+              maxSocketNameLength = Math.max(maxSocketNameLength, input.name.length);
+            }
+          });
+
+          nodeData.outputs.forEach((output) => {
+            let socket = new ClassicPreset.Socket(output.name);
+            if (!node.outputs.hasOwnProperty(output.name)) {
+              node.addOutput(output.name, new ClassicPreset.Output(socket, output.name));
+              maxSocketNameLength = Math.max(maxSocketNameLength, output.name.length);
+            }
+          });
+          node.height = Math.max(140, node.height + (nodeData.inputs.length + nodeData.outputs.length) * 35)
+          node.width += maxSocketNameLength * 5;
+
+          return node;
+        }
+
+
+        async function addNode(editor, area, nodeData) {
+          console.log("Adding node", nodeData);
+          const node = createDynamicNode(nodeData);
+          await editor.addNode(node);
+          editor.nodeMap[nodeData.label] = node; // Assuming each nodeData has a unique ID
+          await area.translate(node.id, { x: nodeData.position[0], y: nodeData.position[1] });
+        }
+
+        async function addLink(editor, area, layout, linkData) {
+          const fromNode = editor.nodeMap[linkData.from_node];
+          const toNode = editor.nodeMap[linkData.to_node];
+          console.log("fromNode", fromNode, "toNode", toNode);
+          let socket;
+          if (fromNode && toNode) {
+            socket = new ClassicPreset.Socket(linkData.from_socket);
+            if (!fromNode.outputs.hasOwnProperty(linkData.from_socket)) {
+              fromNode.addOutput(linkData.from_socket, new ClassicPreset.Output(socket, linkData.from_socket));
+              fromNode.height += 25; // Increase height of node for each output
+              area.update('node', fromNode.id);
+            }
+            socket = new ClassicPreset.Socket(linkData.to_socket);
+            if (!toNode.inputs.hasOwnProperty(linkData.to_socket)) {
+              toNode.addInput(linkData.to_socket, new ClassicPreset.Input(socket, linkData.to_socket));
+              toNode.height += 25; // Increase height of node for each input
+              area.update('node', toNode.id);
+            }
+            await editor.addConnection(new Connection(fromNode, linkData.from_socket, toNode, linkData.to_socket));
+            // await layout(true);
+
+          }
+        }
+
+        async function loadJSON(editor, area, layout, workgraphData) {
+          for (const nodeId in workgraphData.nodes) {
+            const nodeData = workgraphData.nodes[nodeId];
+            await addNode(editor, area, nodeData);
+          }
+
+          // Adding connections based on workgraphData
+          workgraphData.links.forEach(async (link) => { // Specify the type of link here
+            await addLink(editor, area, layout, link);
+          });
+        }
+
+        async function createEditor(container) {
+            const socket = new ClassicPreset.Socket("socket");
+
+            const editor = new NodeEditor(Schemes);
+            const area = new AreaPlugin(container);
+            const connection = new ConnectionPlugin();
+            const render = new ReactPlugin({ createRoot });
+            const arrange = new AutoArrangePlugin();
+
+            const minimap = new MinimapPlugin({
+              boundViewport: true
+            });
+
+            AreaExtensions.selectableNodes(area, AreaExtensions.selector(), {
+                accumulating: AreaExtensions.accumulateOnCtrl(),
+            });
+
+            render.addPreset(Presets.classic.setup());
+            render.addPreset(Presets.minimap.setup({ size: 200 }));
+
+            connection.addPreset(ConnectionPresets.classic.setup());
+
+            const applier = new ArrangeAppliers.TransitionApplier({
+              duration: 500,
+              timingFunction: (t) => t,
+              async onTick() {
+                await AreaExtensions.zoomAt(area, editor.getNodes());
+              }
+            });
+
+            arrange.addPreset(ArrangePresets.classic.setup());
+
+
+            editor.use(area);
+            // area.use(connection);
+            area.use(render);
+            area.use(arrange);
+            area.use(minimap);
+
+
+            AreaExtensions.simpleNodesOrder(area);
+
+            async function layout(animate) {
+              await arrange.layout({ applier: animate ? applier : undefined });
+              AreaExtensions.zoomAt(area, editor.getNodes());
+            }
+
+            // Adding nodes based on workgraphData
+            const nodeMap = {}; // To keep track of created nodes for linking
+            editor.nodeMap = nodeMap;
+
+
+            return {
+              editor: editor,
+              area: area,
+              layout: layout,
+              destroy: () => area.destroy()
+            };
+        }
+
+        function toggleFullScreen() {
+            if (!document.fullscreenElement) {
+                document.documentElement.requestFullscreen();
+            } else if (document.exitFullscreen) {
+                document.exitFullscreen();
+            }
+        }
+
+        function App() {
+            const [editor, setEditor] = useState(null);
+            const containerRef = useRef(null);
+
+            useEffect(() => {
+                if (containerRef.current && !editor) {
+                    createEditor(containerRef.current).then((editor) => {
+                        setEditor(editor);
+                        loadJSON(editor.editor, editor.area, editor.layout, workgraphData).then(() => {
+                          // aplly layout twice to ensure all nodes are arranged
+                          editor?.layout(false).then(() => editor?.layout(true));
+                        });
+                    });
+                }
+                if (document.getElementById('fullscreen-btn')) {
+                    document.getElementById('fullscreen-btn').addEventListener('click', toggleFullScreen);
+                }
+                return () => {
+                    if (editor) {
+                        editor.destroy();
+                    }
+                };
+            }, [containerRef, editor]);
+
+            return (
+
+                <div className="App">
+                    <div>
+                      <button onClick={() => editor?.layout(true)}>Arrange</button>
+                      <button id="fullscreen-btn">Fullscreen</button>
+                    </div>
+                    <div ref={containerRef} className="rete" style={{ height: "100vh", width: "100%" }}></div>
+                </div>
+            );
+        }
+
+        const rootElement = document.getElementById("root");
+        const root = createRoot(rootElement);
+
+        root.render(
+                <App />
+        );
+    </script>
+</body>
+</html>
diff --git a/aiida_mlip/workflows/training.py b/aiida_mlip/workflows/training.py
index e1ea65e4..e409864a 100644
--- a/aiida_mlip/workflows/training.py
+++ b/aiida_mlip/workflows/training.py
@@ -37,9 +37,9 @@ def run_pw_calc(folder: Path, dft_inputs: dict) -> WorkGraph:
         dft_inputs["base"]["structure"] = structure
         dft_inputs["base"]["pw"]["metadata"]["label"] = child.stem
         pw_task = wg.add_task(
-            PwRelaxWorkChain, name=f"pw_relax{child.stem}", **dft_inputs
+            PwRelaxWorkChain, name=f"pw_relax_{child.stem}", **dft_inputs
         )
-        pw_task.set_context({"result": f"pw_relax_{child}"})
+        pw_task.set_context({"result": f"pw_relax_{child.stem}"})
     return wg
 
 
@@ -149,8 +149,8 @@ def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile:
 
 
 wg = WorkGraph("trainingworkflow")
-folder_path = Path("/home/federica/prova_training_wg")
-code = load_code("qe-7.1@scarf1")
+folder_path = Path("/work4/scd/scarf1228/prova_train_workgraph/")
+code = load_code("qe-7.1@scarf")
 inputs = {
     "base": {
         "settings": Dict({"GAMMA_ONLY": True}),
@@ -253,7 +253,7 @@ def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile:
     split_xyz_file, name="split_xyz", xyz_file=create_file_task.outputs.result
 )
 print("CHECKPOINT3")
-janusconfigfile_path = "/home/federica/prova_training_wg/mlip_train.yml"
+janusconfigfile_path = "/work4/scd/scarf1228/prova_train_workgraph/mlip_train.yml"
 janusconfigfile = JanusConfigfile(file=janusconfigfile_path)
 update_config_task = wg.add_task(
     update_janusconfigfile,

From 95fbb81680c075dbe11771a83e1f66d5cfae84a7 Mon Sep 17 00:00:00 2001
From: federicazanca <federicazanca*@gmail.com>
Date: Tue, 30 Jul 2024 15:06:21 +0100
Subject: [PATCH 6/9] ok but entry point not working

---
 .../workflows/html/trainingworkflow.html      |  2 +-
 aiida_mlip/workflows/training.py              | 24 ++++++++++++-------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/aiida_mlip/workflows/html/trainingworkflow.html b/aiida_mlip/workflows/html/trainingworkflow.html
index a15f40e4..0dad339b 100644
--- a/aiida_mlip/workflows/html/trainingworkflow.html
+++ b/aiida_mlip/workflows/html/trainingworkflow.html
@@ -59,7 +59,7 @@
         const { RenderUtils } = ReteRenderUtils;
         const styled = window.styled;
 
-        const workgraphData = {"name": "trainingworkflow", "uuid": "ad06a2d4-4e59-11ef-8066-3cecef4478be", "state": "CREATED", "nodes": {"pw_relax_results": {"label": "pw_relax_results", "inputs": [{"name": "folder", "identifier": "Any", "uuid": "ad0ea862-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad0ea236-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "dft_inputs", "identifier": "Any", "uuid": "ad0eab82-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad0ea236-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}], "outputs": [{"name": "result"}], "position": [30, 30]}, "create_input": {"label": "create_input", "inputs": [{"name": "metadata"}], "outputs": [{"name": "result"}], "position": [60, 60]}, "split_xyz": {"label": "split_xyz", "inputs": [{"name": "xyz_file", "identifier": "Any", "uuid": "ad0f7c1a-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad0f6f86-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [{"from_node": "create_input", "from_socket": "result", "from_socket_uuid": "ad0f1f7c-4e59-11ef-8066-3cecef4478be"}], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "xyz_file"}], "outputs": [{"name": "result"}], "position": [90, 90]}, "update_janusconfigfile": {"label": "update_janusconfigfile", "inputs": [{"name": "janusconfigfile", "identifier": "Any", "uuid": "ad11cbbe-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad11be80-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "_wait"}], "outputs": [{"name": "result"}], "position": [120, 120]}, "training": {"label": "training", "inputs": [{"name": "mlip_config", "identifier": "Any", "uuid": "ad143656-4e59-11ef-8066-3cecef4478be", "node_uuid": "ad13f862-4e59-11ef-8066-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [{"from_node": "update_janusconfigfile", "from_socket": "result", "from_socket_uuid": "ad11cd9e-4e59-11ef-8066-3cecef4478be"}], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "mlip_config"}], "outputs": [], "position": [150, 150]}}, "links": [{"from_socket": "result", "from_node": "pw_relax_results", "from_socket_uuid": "ad0eae66-4e59-11ef-8066-3cecef4478be", "to_socket": "metadata", "to_node": "create_input", "state": false}, {"from_socket": "result", "from_node": "create_input", "from_socket_uuid": "ad0f1f7c-4e59-11ef-8066-3cecef4478be", "to_socket": "xyz_file", "to_node": "split_xyz", "state": false}, {"from_socket": "result", "from_node": "split_xyz", "from_socket_uuid": "ad0f7eea-4e59-11ef-8066-3cecef4478be", "to_socket": "_wait", "to_node": "update_janusconfigfile", "state": false}, {"from_socket": "result", "from_node": "update_janusconfigfile", "from_socket_uuid": "ad11cd9e-4e59-11ef-8066-3cecef4478be", "to_socket": "mlip_config", "to_node": "training", "state": false}]}
+        const workgraphData = {"name": "trainingworkflow", "uuid": "91809e08-4e7b-11ef-9a8b-3cecef4478be", "state": "CREATED", "nodes": {"pw_relax": {"label": "pw_relax", "inputs": [{"name": "folder", "identifier": "Any", "uuid": "91885fd0-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "91885bde-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "dft_inputs", "identifier": "Any", "uuid": "9188614c-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "91885bde-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}], "outputs": [{"name": "result"}], "position": [30, 30]}, "create_input": {"label": "create_input", "inputs": [{"name": "inputs"}], "outputs": [{"name": "result"}], "position": [60, 60]}, "split_xyz": {"label": "split_xyz", "inputs": [{"name": "xyz_file", "identifier": "Any", "uuid": "9188e19e-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "9188dab4-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [{"from_node": "create_input", "from_socket": "result", "from_socket_uuid": "9188a922-4e7b-11ef-9a8b-3cecef4478be"}], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "xyz_file"}], "outputs": [{"name": "result"}], "position": [90, 90]}, "update_janusconfigfile": {"label": "update_janusconfigfile", "inputs": [{"name": "janusconfigfile", "identifier": "Any", "uuid": "918a575e-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "918a4ec6-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "_wait"}], "outputs": [{"name": "result"}], "position": [120, 120]}, "training": {"label": "training", "inputs": [{"name": "mlip_config", "identifier": "Any", "uuid": "918b5794-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "918b2d50-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [{"from_node": "update_janusconfigfile", "from_socket": "result", "from_socket_uuid": "918a58b2-4e7b-11ef-9a8b-3cecef4478be"}], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "mlip_config"}], "outputs": [], "position": [150, 150]}}, "links": [{"from_socket": "result", "from_node": "pw_relax", "from_socket_uuid": "91886322-4e7b-11ef-9a8b-3cecef4478be", "to_socket": "inputs", "to_node": "create_input", "state": false}, {"from_socket": "result", "from_node": "create_input", "from_socket_uuid": "9188a922-4e7b-11ef-9a8b-3cecef4478be", "to_socket": "xyz_file", "to_node": "split_xyz", "state": false}, {"from_socket": "result", "from_node": "split_xyz", "from_socket_uuid": "9188e2b6-4e7b-11ef-9a8b-3cecef4478be", "to_socket": "_wait", "to_node": "update_janusconfigfile", "state": false}, {"from_socket": "result", "from_node": "update_janusconfigfile", "from_socket_uuid": "918a58b2-4e7b-11ef-9a8b-3cecef4478be", "to_socket": "mlip_config", "to_node": "training", "state": false}]}
 
         // Define Schemes to use in vanilla JS
         const Schemes = {
diff --git a/aiida_mlip/workflows/training.py b/aiida_mlip/workflows/training.py
index e409864a..d5f37fde 100644
--- a/aiida_mlip/workflows/training.py
+++ b/aiida_mlip/workflows/training.py
@@ -6,13 +6,13 @@
 from sklearn.model_selection import train_test_split
 
 from aiida.orm import Dict, SinglefileData, load_code
-from aiida.plugins import CalculationFactory, WorkflowFactory
+from aiida.plugins import CalculationFactory, WorkflowFactory, entry_point
 
 from aiida_mlip.data.config import JanusConfigfile
 from aiida_mlip.helpers.help_load import load_structure
+from aiida_quantumespresso.workflows.pw.relax import PwRelaxWorkChain
 
-PwRelaxWorkChain = WorkflowFactory("quantumespresso.pw.relax")
-
+#PwRelaxWorkChain = WorkflowFactory("quantumespresso.pw.relax")
 
 @task.graph_builder(outputs=[{"name": "result", "from": "context.pw"}])
 def run_pw_calc(folder: Path, dft_inputs: dict) -> WorkGraph:
@@ -31,18 +31,21 @@ def run_pw_calc(folder: Path, dft_inputs: dict) -> WorkGraph:
     WorkGraph
         The work graph containing the PW relaxation tasks.
     """
+   
+    print("CHECKPOINT 6")   
     wg = WorkGraph()
     for child in folder.glob("**/*xyz"):
         structure = load_structure(child)
         dft_inputs["base"]["structure"] = structure
         dft_inputs["base"]["pw"]["metadata"]["label"] = child.stem
-        pw_task = wg.add_task(
-            PwRelaxWorkChain, name=f"pw_relax_{child.stem}", **dft_inputs
-        )
-        pw_task.set_context({"result": f"pw_relax_{child.stem}"})
+        pw_task = wg.add_task(PwRelaxWorkChain, name=f"pw_relax_{child.stem}")
+        pw_task.set(dft_inputs)
+        pw_task.set_context({"final_structure": f"pw.{child.stem}"})  
+    print("CHECKPOINT 7")
     return wg
 
 
+
 @task.calcfunction()
 def create_input(**inputs: dict) -> SinglefileData:
     """
@@ -58,6 +61,7 @@ def create_input(**inputs: dict) -> SinglefileData:
     SinglefileData
         A SinglefileData node containing the generated input data.
     """
+    print("CHECKPOINT 8")
     input_data = []
     for name, structure in inputs.items():
         ase_structure = structure.to_ase()
@@ -88,6 +92,7 @@ def split_xyz_file(xyz_file: SinglefileData) -> dict:
         A dictionary with keys 'train', 'test', and 'validation', each containing
         SinglefileData nodes for the respective datasets.
     """
+    print("CHECKPOINT 9")
     with xyz_file.open() as file:
         lines = file.readlines()
 
@@ -133,6 +138,7 @@ def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile:
     JanusConfigfile
         A new JanusConfigfile with updated paths.
     """
+    print("CHECKPOINT 10")
     janus_dict = janusconfigfile.as_dictionary
     config_parse = janusconfigfile.get_content()
 
@@ -241,12 +247,12 @@ def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile:
 }
 
 pw_task = wg.add_task(
-    run_pw_calc, name="pw_relax_results", folder=folder_path, dft_inputs=inputs
+    run_pw_calc, name="pw_relax", folder=folder_path, dft_inputs=inputs
 )
 
 print("CHECKPOINT1")
 create_file_task = wg.add_task(create_input, name="create_input")
-wg.add_link(pw_task.outputs[0], create_file_task.inputs[0])
+wg.add_link(pw_task.outputs["result"], create_file_task.inputs["inputs"])
 
 print("CHECKPOINT2")
 split_files_task = wg.add_task(

From 34c535b92544db521dd344c0f704ea3b90bafc17 Mon Sep 17 00:00:00 2001
From: federica <federicazanca8@gmail.com>
Date: Wed, 31 Jul 2024 14:44:52 +0100
Subject: [PATCH 7/9] fixed workgraph and submission

---
 .../workflows/html/trainingworkflow.html      | 258 ----------------
 aiida_mlip/workflows/hts.py                   | 114 -------
 aiida_mlip/workflows/training.py              | 282 ------------------
 aiida_mlip/workflows/training_workgraph.py    | 215 +++++++++++++
 examples/workflows/submit_train_wg.py         | 103 +++++++
 pyproject.toml                                |   6 +-
 6 files changed, 322 insertions(+), 656 deletions(-)
 delete mode 100644 aiida_mlip/workflows/html/trainingworkflow.html
 delete mode 100644 aiida_mlip/workflows/hts.py
 delete mode 100644 aiida_mlip/workflows/training.py
 create mode 100644 aiida_mlip/workflows/training_workgraph.py
 create mode 100644 examples/workflows/submit_train_wg.py

diff --git a/aiida_mlip/workflows/html/trainingworkflow.html b/aiida_mlip/workflows/html/trainingworkflow.html
deleted file mode 100644
index 0dad339b..00000000
--- a/aiida_mlip/workflows/html/trainingworkflow.html
+++ /dev/null
@@ -1,258 +0,0 @@
-
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Rete.js with React in Vanilla JS</title>
-    <!-- Import React, ReactDOM, and Babel from CDN -->
-    <script src="https://unpkg.com/react@18.2.0/umd/react.development.js" crossorigin></script>
-    <script src="https://unpkg.com/react-dom@18.2.0/umd/react-dom.development.js" crossorigin></script>
-    <script src="https://unpkg.com/@babel/standalone/babel.min.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/react-is/18.2.0/umd/react-is.production.min.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/styled-components@5.3.6/dist/styled-components.min.js"></script>
-    <script src="https://unpkg.com/elkjs@0.8.2/lib/elk.bundled.js"></script>
-
-    <!-- Import Rete.js and its plugins from CDN -->
-    <script src="https://cdn.jsdelivr.net/npm/rete@2.0.3/rete.min.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/rete-area-plugin@2.0.3/rete-area-plugin.min.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/rete-connection-plugin@2.0.2/rete-connection-plugin.min.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/rete-render-utils@2.0.2/rete-render-utils.min.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/rete-react-plugin@2.0.5/rete-react-plugin.min.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/rete-auto-arrange-plugin@2.0.1/rete-auto-arrange-plugin.min.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/rete-minimap-plugin@2.0.1/rete-minimap-plugin.min.js"></script>
-
-    <style>
-        .App {
-            font-family: sans-serif;
-            background: rgb(200, 190, 190);
-        }
-        .rete {
-          position: relative;
-          font-size: 1rem;
-          margin: 1em;
-          border-radius: 1em;
-          text-align: left;
-        }
-        #fullscreen-btn {
-            margin-left: 10px;
-        }
-        body {
-            overflow: hidden;
-            margin: 0;
-            padding: 0;
-        }
-    </style>
-</head>
-<body>
-    <div id="root"></div>
-    <script type="text/babel">
-
-        const { useState, useRef, useEffect } = React;
-        const { createRoot } = ReactDOM;
-        const { NodeEditor, ClassicPreset } = Rete;
-        const { AreaPlugin, AreaExtensions } = ReteAreaPlugin;
-        const { ConnectionPlugin, Presets: ConnectionPresets } = ReteConnectionPlugin;
-        const { ReactPlugin, Presets } = ReteReactPlugin;
-        const { AutoArrangePlugin, Presets: ArrangePresets, ArrangeAppliers} = ReteAutoArrangePlugin;
-        const { MinimapExtra, MinimapPlugin } = ReteMinimapPlugin;
-        const { RenderUtils } = ReteRenderUtils;
-        const styled = window.styled;
-
-        const workgraphData = {"name": "trainingworkflow", "uuid": "91809e08-4e7b-11ef-9a8b-3cecef4478be", "state": "CREATED", "nodes": {"pw_relax": {"label": "pw_relax", "inputs": [{"name": "folder", "identifier": "Any", "uuid": "91885fd0-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "91885bde-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "dft_inputs", "identifier": "Any", "uuid": "9188614c-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "91885bde-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}], "outputs": [{"name": "result"}], "position": [30, 30]}, "create_input": {"label": "create_input", "inputs": [{"name": "inputs"}], "outputs": [{"name": "result"}], "position": [60, 60]}, "split_xyz": {"label": "split_xyz", "inputs": [{"name": "xyz_file", "identifier": "Any", "uuid": "9188e19e-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "9188dab4-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [{"from_node": "create_input", "from_socket": "result", "from_socket_uuid": "9188a922-4e7b-11ef-9a8b-3cecef4478be"}], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "xyz_file"}], "outputs": [{"name": "result"}], "position": [90, 90]}, "update_janusconfigfile": {"label": "update_janusconfigfile", "inputs": [{"name": "janusconfigfile", "identifier": "Any", "uuid": "918a575e-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "918a4ec6-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "_wait"}], "outputs": [{"name": "result"}], "position": [120, 120]}, "training": {"label": "training", "inputs": [{"name": "mlip_config", "identifier": "Any", "uuid": "918b5794-4e7b-11ef-9a8b-3cecef4478be", "node_uuid": "918b2d50-4e7b-11ef-9a8b-3cecef4478be", "type": "INPUT", "link_limit": 1, "links": [{"from_node": "update_janusconfigfile", "from_socket": "result", "from_socket_uuid": "918a58b2-4e7b-11ef-9a8b-3cecef4478be"}], "serialize": {"path": "node_graph.serializer", "name": "serialize_pickle"}, "deserialize": {"path": "node_graph.serializer", "name": "deserialize_pickle"}}, {"name": "mlip_config"}], "outputs": [], "position": [150, 150]}}, "links": [{"from_socket": "result", "from_node": "pw_relax", "from_socket_uuid": "91886322-4e7b-11ef-9a8b-3cecef4478be", "to_socket": "inputs", "to_node": "create_input", "state": false}, {"from_socket": "result", "from_node": "create_input", "from_socket_uuid": "9188a922-4e7b-11ef-9a8b-3cecef4478be", "to_socket": "xyz_file", "to_node": "split_xyz", "state": false}, {"from_socket": "result", "from_node": "split_xyz", "from_socket_uuid": "9188e2b6-4e7b-11ef-9a8b-3cecef4478be", "to_socket": "_wait", "to_node": "update_janusconfigfile", "state": false}, {"from_socket": "result", "from_node": "update_janusconfigfile", "from_socket_uuid": "918a58b2-4e7b-11ef-9a8b-3cecef4478be", "to_socket": "mlip_config", "to_node": "training", "state": false}]}
-
-        // Define Schemes to use in vanilla JS
-        const Schemes = {
-            Node: ClassicPreset.Node,
-            Connection: ClassicPreset.Connection
-        };
-
-        class Node extends ClassicPreset.Node {
-          width = 180;
-          height = 100;
-        }
-        class Connection extends ClassicPreset.Connection {}
-
-        function createDynamicNode(nodeData) {
-          const node = new Node(nodeData.label);
-          // resize the node based on the max length of the input/output names
-          let maxSocketNameLength = 0;
-          nodeData.inputs.forEach((input) => {
-            let socket = new ClassicPreset.Socket(input.name);
-            if (!node.inputs.hasOwnProperty(input.name)) {
-              node.addInput(input.name, new ClassicPreset.Input(socket, input.name));
-              maxSocketNameLength = Math.max(maxSocketNameLength, input.name.length);
-            }
-          });
-
-          nodeData.outputs.forEach((output) => {
-            let socket = new ClassicPreset.Socket(output.name);
-            if (!node.outputs.hasOwnProperty(output.name)) {
-              node.addOutput(output.name, new ClassicPreset.Output(socket, output.name));
-              maxSocketNameLength = Math.max(maxSocketNameLength, output.name.length);
-            }
-          });
-          node.height = Math.max(140, node.height + (nodeData.inputs.length + nodeData.outputs.length) * 35)
-          node.width += maxSocketNameLength * 5;
-
-          return node;
-        }
-
-
-        async function addNode(editor, area, nodeData) {
-          console.log("Adding node", nodeData);
-          const node = createDynamicNode(nodeData);
-          await editor.addNode(node);
-          editor.nodeMap[nodeData.label] = node; // Assuming each nodeData has a unique ID
-          await area.translate(node.id, { x: nodeData.position[0], y: nodeData.position[1] });
-        }
-
-        async function addLink(editor, area, layout, linkData) {
-          const fromNode = editor.nodeMap[linkData.from_node];
-          const toNode = editor.nodeMap[linkData.to_node];
-          console.log("fromNode", fromNode, "toNode", toNode);
-          let socket;
-          if (fromNode && toNode) {
-            socket = new ClassicPreset.Socket(linkData.from_socket);
-            if (!fromNode.outputs.hasOwnProperty(linkData.from_socket)) {
-              fromNode.addOutput(linkData.from_socket, new ClassicPreset.Output(socket, linkData.from_socket));
-              fromNode.height += 25; // Increase height of node for each output
-              area.update('node', fromNode.id);
-            }
-            socket = new ClassicPreset.Socket(linkData.to_socket);
-            if (!toNode.inputs.hasOwnProperty(linkData.to_socket)) {
-              toNode.addInput(linkData.to_socket, new ClassicPreset.Input(socket, linkData.to_socket));
-              toNode.height += 25; // Increase height of node for each input
-              area.update('node', toNode.id);
-            }
-            await editor.addConnection(new Connection(fromNode, linkData.from_socket, toNode, linkData.to_socket));
-            // await layout(true);
-
-          }
-        }
-
-        async function loadJSON(editor, area, layout, workgraphData) {
-          for (const nodeId in workgraphData.nodes) {
-            const nodeData = workgraphData.nodes[nodeId];
-            await addNode(editor, area, nodeData);
-          }
-
-          // Adding connections based on workgraphData
-          workgraphData.links.forEach(async (link) => { // Specify the type of link here
-            await addLink(editor, area, layout, link);
-          });
-        }
-
-        async function createEditor(container) {
-            const socket = new ClassicPreset.Socket("socket");
-
-            const editor = new NodeEditor(Schemes);
-            const area = new AreaPlugin(container);
-            const connection = new ConnectionPlugin();
-            const render = new ReactPlugin({ createRoot });
-            const arrange = new AutoArrangePlugin();
-
-            const minimap = new MinimapPlugin({
-              boundViewport: true
-            });
-
-            AreaExtensions.selectableNodes(area, AreaExtensions.selector(), {
-                accumulating: AreaExtensions.accumulateOnCtrl(),
-            });
-
-            render.addPreset(Presets.classic.setup());
-            render.addPreset(Presets.minimap.setup({ size: 200 }));
-
-            connection.addPreset(ConnectionPresets.classic.setup());
-
-            const applier = new ArrangeAppliers.TransitionApplier({
-              duration: 500,
-              timingFunction: (t) => t,
-              async onTick() {
-                await AreaExtensions.zoomAt(area, editor.getNodes());
-              }
-            });
-
-            arrange.addPreset(ArrangePresets.classic.setup());
-
-
-            editor.use(area);
-            // area.use(connection);
-            area.use(render);
-            area.use(arrange);
-            area.use(minimap);
-
-
-            AreaExtensions.simpleNodesOrder(area);
-
-            async function layout(animate) {
-              await arrange.layout({ applier: animate ? applier : undefined });
-              AreaExtensions.zoomAt(area, editor.getNodes());
-            }
-
-            // Adding nodes based on workgraphData
-            const nodeMap = {}; // To keep track of created nodes for linking
-            editor.nodeMap = nodeMap;
-
-
-            return {
-              editor: editor,
-              area: area,
-              layout: layout,
-              destroy: () => area.destroy()
-            };
-        }
-
-        function toggleFullScreen() {
-            if (!document.fullscreenElement) {
-                document.documentElement.requestFullscreen();
-            } else if (document.exitFullscreen) {
-                document.exitFullscreen();
-            }
-        }
-
-        function App() {
-            const [editor, setEditor] = useState(null);
-            const containerRef = useRef(null);
-
-            useEffect(() => {
-                if (containerRef.current && !editor) {
-                    createEditor(containerRef.current).then((editor) => {
-                        setEditor(editor);
-                        loadJSON(editor.editor, editor.area, editor.layout, workgraphData).then(() => {
-                          // aplly layout twice to ensure all nodes are arranged
-                          editor?.layout(false).then(() => editor?.layout(true));
-                        });
-                    });
-                }
-                if (document.getElementById('fullscreen-btn')) {
-                    document.getElementById('fullscreen-btn').addEventListener('click', toggleFullScreen);
-                }
-                return () => {
-                    if (editor) {
-                        editor.destroy();
-                    }
-                };
-            }, [containerRef, editor]);
-
-            return (
-
-                <div className="App">
-                    <div>
-                      <button onClick={() => editor?.layout(true)}>Arrange</button>
-                      <button id="fullscreen-btn">Fullscreen</button>
-                    </div>
-                    <div ref={containerRef} className="rete" style={{ height: "100vh", width: "100%" }}></div>
-                </div>
-            );
-        }
-
-        const rootElement = document.getElementById("root");
-        const root = createRoot(rootElement);
-
-        root.render(
-                <App />
-        );
-    </script>
-</body>
-</html>
diff --git a/aiida_mlip/workflows/hts.py b/aiida_mlip/workflows/hts.py
deleted file mode 100644
index 13dca328..00000000
--- a/aiida_mlip/workflows/hts.py
+++ /dev/null
@@ -1,114 +0,0 @@
-"""Example code for submitting single point calculation"""
-
-import csv
-from pathlib import Path
-import sys
-import time
-
-import click
-
-from aiida.common import NotExistent
-from aiida.engine import run, run_get_node, run_get_pk, submit
-from aiida.orm import load_code, load_group, load_node
-from aiida.plugins import CalculationFactory
-
-from aiida_mlip.data.config import JanusConfigfile
-from aiida_mlip.data.model import ModelData
-from aiida_mlip.helpers.help_load import load_structure
-
-
-def run_hts(folder, config, calc, output_filename, code, group, launch):
-    # Add the required inputs for aiida
-    metadata = {"options": {"resources": {"num_machines": 1}}}
-
-    # All the other paramenters we want them from the config file
-    # We want to pass it as a AiiDA data type for the provenance
-    conf = JanusConfigfile(config)
-    # Define calculation to run
-    Calculation = CalculationFactory(f"mlip.{calc}")
-    model = ModelData.download(
-        url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model",
-        cache_dir="models",
-        architecture="mace_mp",
-        filename="small.model",
-    )
-    list_of_nodes = []
-    p = Path(folder)
-    for child in p.glob("**/*"):
-        if child.name.endswith("cif"):
-            print(child.name)
-            metadata["label"] = f"{child.name}"
-            # This structure will overwrite the one in the config file if present
-            structure = load_structure(child.absolute())
-            # Run calculation
-            if launch == "run_get_pk":
-                result, pk = run_get_pk(
-                    Calculation,
-                    code=code,
-                    struct=structure,
-                    metadata=metadata,
-                    config=conf,
-                    model=model,
-                )
-                list_of_nodes.append(pk)
-
-                group.add_nodes(load_node(pk))
-                time.sleep(1)
-                print(f"Printing results from calculation: {result}")
-
-            if launch == "submit":
-                result = submit(
-                    Calculation,
-                    code=code,
-                    struct=structure,
-                    metadata=metadata,
-                    config=conf,
-                    model=model,
-                )
-                list_of_nodes.append(result.pk)
-
-                group.add_nodes(load_node(result.pk))
-
-                print(f"Printing results from calculation: {result}")
-
-    print(f"printing dictionary with all {list_of_nodes}")
-    # write list of nodes in csv file
-    with open(output_filename, "w", newline="") as csvfile:
-        writer = csv.writer(csvfile)
-        writer.writerow(["name", "PK"])
-        for node in list_of_nodes:
-            writer.writerow([load_node(node).label, node])
-
-
-@click.command("cli")
-@click.option("--folder", type=Path)
-@click.option(
-    "--config",
-    type=Path,
-    help="Config file to use",
-    default="/work4/scd/scarf1228/config_janus.yaml",
-)
-@click.option("--calc", type=str, help="Calc to run", default="sp")
-@click.option("--output_filename", type=str, default="list_nodes.csv")
-@click.option("--codelabel", type=str, default="janus@scarf-hq")
-@click.option("--group", type=int, default=8)
-@click.option(
-    "--launch", type=str, default="submit", help="can be run_get_pk or submit"
-)
-def cli(folder, config, calc, output_filename, codelabel, group, launch):
-    """Click interface."""
-    try:
-        code = load_code(codelabel)
-    except NotExistent:
-        print(f"The code '{codelabel}' does not exist.")
-        sys.exit(1)
-    try:
-        group = load_group(group)
-    except NotExistent:
-        print(f"The group '{group}' does not exist.")
-
-    run_hts(folder, config, calc, output_filename, code, group, launch)
-
-
-if __name__ == "__main__":
-    cli()  # pylint: disable=no-value-for-parameter
diff --git a/aiida_mlip/workflows/training.py b/aiida_mlip/workflows/training.py
deleted file mode 100644
index d5f37fde..00000000
--- a/aiida_mlip/workflows/training.py
+++ /dev/null
@@ -1,282 +0,0 @@
-""" Workgraph to run DFT calculations and use the outputs fpr training a MLIP model."""
-
-from pathlib import Path
-
-from aiida_workgraph import WorkGraph, task
-from sklearn.model_selection import train_test_split
-
-from aiida.orm import Dict, SinglefileData, load_code
-from aiida.plugins import CalculationFactory, WorkflowFactory, entry_point
-
-from aiida_mlip.data.config import JanusConfigfile
-from aiida_mlip.helpers.help_load import load_structure
-from aiida_quantumespresso.workflows.pw.relax import PwRelaxWorkChain
-
-#PwRelaxWorkChain = WorkflowFactory("quantumespresso.pw.relax")
-
-@task.graph_builder(outputs=[{"name": "result", "from": "context.pw"}])
-def run_pw_calc(folder: Path, dft_inputs: dict) -> WorkGraph:
-    """
-    Run a quantumespresso calculation using PwRelaxWorkChain.
-
-    Parameters
-    ----------
-    folder : Path
-        Path to the folder containing input structure files.
-    dft_inputs : dict
-        Dictionary of inputs for the DFT calculations.
-
-    Returns
-    -------
-    WorkGraph
-        The work graph containing the PW relaxation tasks.
-    """
-   
-    print("CHECKPOINT 6")   
-    wg = WorkGraph()
-    for child in folder.glob("**/*xyz"):
-        structure = load_structure(child)
-        dft_inputs["base"]["structure"] = structure
-        dft_inputs["base"]["pw"]["metadata"]["label"] = child.stem
-        pw_task = wg.add_task(PwRelaxWorkChain, name=f"pw_relax_{child.stem}")
-        pw_task.set(dft_inputs)
-        pw_task.set_context({"final_structure": f"pw.{child.stem}"})  
-    print("CHECKPOINT 7")
-    return wg
-
-
-
-@task.calcfunction()
-def create_input(**inputs: dict) -> SinglefileData:
-    """
-    Create input files from given structures.
-
-    Parameters
-    ----------
-    **inputs : dict
-        Dictionary where keys are names and values are structure data.
-
-    Returns
-    -------
-    SinglefileData
-        A SinglefileData node containing the generated input data.
-    """
-    print("CHECKPOINT 8")
-    input_data = []
-    for name, structure in inputs.items():
-        ase_structure = structure.to_ase()
-        extxyz_str = ase_structure.write(format="extxyz")
-        input_data.append(extxyz_str)
-    temp_file_path = "tmp.extxyz"
-    with open(temp_file_path, "w") as temp_file:
-        temp_file.write("\n".join(input_data))
-
-    file_data = SinglefileData(file=temp_file_path)
-
-    return file_data
-
-
-@task.calcfunction()
-def split_xyz_file(xyz_file: SinglefileData) -> dict:
-    """
-    Split an XYZ file into training, testing, and validation datasets.
-
-    Parameters
-    ----------
-    xyz_file : SinglefileData
-        A SinglefileData node containing the XYZ file.
-
-    Returns
-    -------
-    dict
-        A dictionary with keys 'train', 'test', and 'validation', each containing
-        SinglefileData nodes for the respective datasets.
-    """
-    print("CHECKPOINT 9")
-    with xyz_file.open() as file:
-        lines = file.readlines()
-
-    data = [line.strip() for line in lines if line.strip()]
-
-    train_data, test_validation_data = train_test_split(
-        data, test_size=0.4, random_state=42
-    )
-    test_data, validation_data = train_test_split(
-        test_validation_data, test_size=0.5, random_state=42
-    )
-
-    train_path = "train.extxyz"
-    test_path = "test.extxyz"
-    validation_path = "validation.extxyz"
-
-    with open(train_path, "w") as f:
-        f.write("\n".join(train_data))
-    with open(test_path, "w") as f:
-        f.write("\n".join(test_data))
-    with open(validation_path, "w") as f:
-        f.write("\n".join(validation_data))
-
-    return {
-        "train": SinglefileData(file=train_path),
-        "test": SinglefileData(file=test_path),
-        "validation": SinglefileData(file=validation_path),
-    }
-
-
-@task.calcfunction()
-def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile:
-    """
-    Update the JanusConfigfile with new paths for train, test, and validation datasets.
-
-    Parameters
-    ----------
-    janusconfigfile : JanusConfigfile
-        The original JanusConfigfile.
-
-    Returns
-    -------
-    JanusConfigfile
-        A new JanusConfigfile with updated paths.
-    """
-    print("CHECKPOINT 10")
-    janus_dict = janusconfigfile.as_dictionary
-    config_parse = janusconfigfile.get_content()
-
-    content = config_parse.replace(janus_dict["train_file"], "train.extxyz")
-    content = content.replace(janus_dict["test_file"], "test.extxyz")
-    content = content.replace(janus_dict["train_file"], "validation.extxyz")
-
-    new_config_path = "./config.yml"
-
-    with open(new_config_path, "w") as file:
-        file.write(content)
-
-    return JanusConfigfile(file=new_config_path)
-
-
-wg = WorkGraph("trainingworkflow")
-folder_path = Path("/work4/scd/scarf1228/prova_train_workgraph/")
-code = load_code("qe-7.1@scarf")
-inputs = {
-    "base": {
-        "settings": Dict({"GAMMA_ONLY": True}),
-        "pw": {
-            "parameters": Dict(
-                {
-                    "CONTROL": {
-                        "calculation": "vc-relax",
-                        "nstep": 1200,
-                        "etot_conv_thr": 1e-05,
-                        "forc_conv_thr": 1e-04,
-                    },
-                    "SYSTEM": {
-                        "ecutwfc": 500,
-                        "input_dft": "PBE",
-                        "nspin": 1,
-                        "occupations": "smearing",
-                        "degauss": 0.001,
-                        "smearing": "m-p",
-                    },
-                    "ELECTRONS": {
-                        "electron_maxstep": 1000,
-                        "scf_must_converge": False,
-                        "conv_thr": 1e-08,
-                        "mixing_beta": 0.25,
-                        "diago_david_ndim": 4,
-                        "startingpot": "atomic",
-                        "startingwfc": "atomic+random",
-                    },
-                    "IONS": {
-                        "ion_dynamics": "bfgs",
-                    },
-                    "CELL": {
-                        "cell_dynamics": "bfgs",
-                        "cell_dofree": "ibrav",
-                    },
-                }
-            ),
-            "code": code,
-            "metadata": {
-                "options": {
-                    "resources": {
-                        "num_machines": 4,
-                        "num_mpiprocs_per_machine": 32,
-                    },
-                    "max_wallclock_seconds": 48 * 60 * 60,
-                },
-            },
-        },
-    },
-    "base_final_scf": {
-        "pw": {
-            "parameters": Dict(
-                {
-                    "CONTROL": {
-                        "calculation": "scf",
-                        "tprnfor": True,
-                    },
-                    "SYSTEM": {
-                        "ecutwfc": 70,
-                        "ecutrho": 650,
-                        "input_dft": "PBE",
-                        "occupations": "smearing",
-                        "degauss": 0.001,
-                        "smearing": "m-p",
-                    },
-                    "ELECTRONS": {
-                        "conv_thr": 1e-10,
-                        "mixing_beta": 0.25,
-                        "diago_david_ndim": 4,
-                        "startingpot": "atomic",
-                        "startingwfc": "atomic+random",
-                    },
-                }
-            ),
-            "code": code,
-            "metadata": {
-                "options": {
-                    "resources": {
-                        "num_machines": 1,
-                        "num_mpiprocs_per_machine": 32,
-                    },
-                    "max_wallclock_seconds": 3 * 60 * 60,
-                },
-            },
-        },
-    },
-}
-
-pw_task = wg.add_task(
-    run_pw_calc, name="pw_relax", folder=folder_path, dft_inputs=inputs
-)
-
-print("CHECKPOINT1")
-create_file_task = wg.add_task(create_input, name="create_input")
-wg.add_link(pw_task.outputs["result"], create_file_task.inputs["inputs"])
-
-print("CHECKPOINT2")
-split_files_task = wg.add_task(
-    split_xyz_file, name="split_xyz", xyz_file=create_file_task.outputs.result
-)
-print("CHECKPOINT3")
-janusconfigfile_path = "/work4/scd/scarf1228/prova_train_workgraph/mlip_train.yml"
-janusconfigfile = JanusConfigfile(file=janusconfigfile_path)
-update_config_task = wg.add_task(
-    update_janusconfigfile,
-    name="update_janusconfigfile",
-    janusconfigfile=janusconfigfile,
-)
-
-wg.add_link(split_files_task.outputs["result"], update_config_task.inputs["_wait"])
-print("CHECKPOINT4")
-training_calc = CalculationFactory("mlip.train")
-train_inputs = {}
-train_inputs["config_file"] = update_config_task.outputs.result
-train_task = wg.add_task(
-    training_calc, name="training", mlip_config=update_config_task.outputs.result
-)
-
-wg.to_html()
-print("CHECKPOINT5")
-wg.max_number_jobs = 10
-wg.submit(wait=True)
diff --git a/aiida_mlip/workflows/training_workgraph.py b/aiida_mlip/workflows/training_workgraph.py
new file mode 100644
index 00000000..19b7dc08
--- /dev/null
+++ b/aiida_mlip/workflows/training_workgraph.py
@@ -0,0 +1,215 @@
+""" Workgraph to run DFT calculations and use the outputs fpr training a MLIP model."""
+
+from pathlib import Path
+
+from aiida_quantumespresso.workflows.pw.relax import PwRelaxWorkChain
+from aiida_workgraph.workgraph import WorkGraph, task
+from ase.io import read
+from sklearn.model_selection import train_test_split
+
+from aiida.orm import Dict, SinglefileData, load_code
+from aiida.plugins import CalculationFactory, WorkflowFactory, entry_point
+
+from aiida_mlip.data.config import JanusConfigfile
+from aiida_mlip.helpers.help_load import load_structure
+
+PwRelaxWorkChain = WorkflowFactory("quantumespresso.pw.relax")
+
+
+@task.graph_builder(outputs=[{"name": "result", "from": "context.pw"}])
+def run_pw_calc(folder: Path, dft_inputs: dict) -> WorkGraph:
+    """
+    Run a quantumespresso calculation using PwRelaxWorkChain.
+
+    Parameters
+    ----------
+    folder : Path
+        Path to the folder containing input structure files.
+    dft_inputs : dict
+        Dictionary of inputs for the DFT calculations.
+
+    Returns
+    -------
+    WorkGraph
+        The work graph containing the PW relaxation tasks.
+    """
+    wg = WorkGraph()
+
+    for child in folder.glob("**/*"):
+        try:
+            read(child.as_posix())
+        except Exception:  # pylint: disable=broad-except
+            continue
+        structure = load_structure(child)
+        dft_inputs["base"]["structure"] = structure
+        dft_inputs["base"]["pw"]["metadata"]["label"] = child.stem
+        pw_task = wg.add_task(
+            PwRelaxWorkChain, name=f"pw_relax_{child.stem}", **dft_inputs
+        )
+        pw_task.set_context({"output_structure": f"pw.{child.stem}"})
+    return wg
+
+
+@task.calcfunction()
+def create_input(**inputs: dict) -> SinglefileData:
+    """
+    Create input files from given structures.
+
+    Parameters
+    ----------
+    **inputs : dict
+        Dictionary where keys are names and values are structure data.
+
+    Returns
+    -------
+    SinglefileData
+        A SinglefileData node containing the generated input data.
+    """
+
+    input_data = []
+    for name, structure in inputs.items():
+        ase_structure = structure.to_ase()
+        extxyz_str = ase_structure.write(format="extxyz")
+        input_data.append(extxyz_str)
+    temp_file_path = "tmp.extxyz"
+    with open(temp_file_path, "w") as temp_file:
+        temp_file.write("\n".join(input_data))
+
+    file_data = SinglefileData(file=temp_file_path)
+
+    return file_data
+
+
+@task.calcfunction()
+def split_xyz_file(xyz_file: SinglefileData) -> dict:
+    """
+    Split an XYZ file into training, testing, and validation datasets.
+
+    Parameters
+    ----------
+    xyz_file : SinglefileData
+        A SinglefileData node containing the XYZ file.
+
+    Returns
+    -------
+    dict
+        A dictionary with keys 'train', 'test', and 'validation', each containing
+        SinglefileData nodes for the respective datasets.
+    """
+
+    with xyz_file.open() as file:
+        lines = file.readlines()
+
+    data = [line.strip() for line in lines if line.strip()]
+
+    train_data, test_validation_data = train_test_split(
+        data, test_size=0.4, random_state=42
+    )
+    test_data, validation_data = train_test_split(
+        test_validation_data, test_size=0.5, random_state=42
+    )
+
+    train_path = "train.extxyz"
+    test_path = "test.extxyz"
+    validation_path = "validation.extxyz"
+
+    with open(train_path, "w") as f:
+        f.write("\n".join(train_data))
+    with open(test_path, "w") as f:
+        f.write("\n".join(test_data))
+    with open(validation_path, "w") as f:
+        f.write("\n".join(validation_data))
+
+    return {
+        "train": SinglefileData(file=train_path),
+        "test": SinglefileData(file=test_path),
+        "validation": SinglefileData(file=validation_path),
+    }
+
+
+@task.calcfunction()
+def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile:
+    """
+    Update the JanusConfigfile with new paths for train, test, and validation datasets.
+
+    Parameters
+    ----------
+    janusconfigfile : JanusConfigfile
+        The original JanusConfigfile.
+
+    Returns
+    -------
+    JanusConfigfile
+        A new JanusConfigfile with updated paths.
+    """
+    print("CHECKPOINT 10")
+    janus_dict = janusconfigfile.as_dictionary
+    config_parse = janusconfigfile.get_content()
+
+    content = config_parse.replace(janus_dict["train_file"], "train.extxyz")
+    content = content.replace(janus_dict["test_file"], "test.extxyz")
+    content = content.replace(janus_dict["train_file"], "validation.extxyz")
+
+    new_config_path = "./config.yml"
+
+    with open(new_config_path, "w") as file:
+        file.write(content)
+
+    return JanusConfigfile(file=new_config_path)
+
+
+def TrainWorkGraph(
+    folder_path: Path, inputs: dict, janusconfigfile: JanusConfigfile
+) -> WorkGraph:
+    """
+    Create a workflow for optimising using QE and using the results for training mlips.
+
+    Parameters
+    ----------
+    folder_path : Path
+        Path to the folder containing input structure files.
+    inputs : dict
+        Dictionary of inputs for the calculations.
+    janusconfigfile : JanusConfigfile
+        File with inputs for janus calculations.
+
+    Returns
+    -------
+    WorkGraph
+        The workgraph containing the training workflow.
+    """
+    wg = WorkGraph("trainingworkflow")
+
+    pw_task = wg.add_task(
+        run_pw_calc, name="pw_relax", folder=folder_path, dft_inputs=inputs
+    )
+
+    create_file_task = wg.add_task(create_input, name="create_input")
+    wg.add_link(pw_task.outputs["result"], create_file_task.inputs["inputs"])
+
+    split_files_task = wg.add_task(
+        split_xyz_file, name="split_xyz", xyz_file=create_file_task.outputs.result
+    )
+
+    update_config_task = wg.add_task(
+        update_janusconfigfile,
+        name="update_janusconfigfile",
+        janusconfigfile=janusconfigfile,
+    )
+
+    wg.add_link(split_files_task.outputs["result"], update_config_task.inputs["_wait"])
+
+    training_calc = CalculationFactory("mlip.train")
+    train_inputs = {}
+    train_inputs["config_file"] = update_config_task.outputs.result
+    train_task = wg.add_task(
+        training_calc, name="training", mlip_config=update_config_task.outputs.result
+    )
+    wg.group_outputs = [{"name": "opt_structures", "from": "pw_task.output_structures"}]
+    wg.group_outputs = [{"name": "final_model", "from": "train_task.outputs.model"}]
+
+    wg.to_html()
+
+    wg.max_number_jobs = 10
+    wg.submit(wait=True)
+    return wg
diff --git a/examples/workflows/submit_train_wg.py b/examples/workflows/submit_train_wg.py
new file mode 100644
index 00000000..bc9b4fd1
--- /dev/null
+++ b/examples/workflows/submit_train_wg.py
@@ -0,0 +1,103 @@
+"""Example submission for hts workgraph."""
+
+from pathlib import Path
+
+from aiida.orm import Dict, load_code
+
+from aiida_mlip.data.config import JanusConfigfile
+from aiida_mlip.workflows.training_workgraph import TrainWorkGraph
+
+folder_path = Path("/work4/scd/scarf1228/prova_train_workgraph/")
+code = load_code("qe-7.1@scarf")
+inputs = {
+    "base": {
+        "settings": Dict({"GAMMA_ONLY": True}),
+        "pw": {
+            "parameters": Dict(
+                {
+                    "CONTROL": {
+                        "calculation": "vc-relax",
+                        "nstep": 1200,
+                        "etot_conv_thr": 1e-05,
+                        "forc_conv_thr": 1e-04,
+                    },
+                    "SYSTEM": {
+                        "ecutwfc": 500,
+                        "input_dft": "PBE",
+                        "nspin": 1,
+                        "occupations": "smearing",
+                        "degauss": 0.001,
+                        "smearing": "m-p",
+                    },
+                    "ELECTRONS": {
+                        "electron_maxstep": 1000,
+                        "scf_must_converge": False,
+                        "conv_thr": 1e-08,
+                        "mixing_beta": 0.25,
+                        "diago_david_ndim": 4,
+                        "startingpot": "atomic",
+                        "startingwfc": "atomic+random",
+                    },
+                    "IONS": {
+                        "ion_dynamics": "bfgs",
+                    },
+                    "CELL": {
+                        "cell_dynamics": "bfgs",
+                        "cell_dofree": "ibrav",
+                    },
+                }
+            ),
+            "code": code,
+            "metadata": {
+                "options": {
+                    "resources": {
+                        "num_machines": 4,
+                        "num_mpiprocs_per_machine": 32,
+                    },
+                    "max_wallclock_seconds": 48 * 60 * 60,
+                },
+            },
+        },
+    },
+    "base_final_scf": {
+        "pw": {
+            "parameters": Dict(
+                {
+                    "CONTROL": {
+                        "calculation": "scf",
+                        "tprnfor": True,
+                    },
+                    "SYSTEM": {
+                        "ecutwfc": 70,
+                        "ecutrho": 650,
+                        "input_dft": "PBE",
+                        "occupations": "smearing",
+                        "degauss": 0.001,
+                        "smearing": "m-p",
+                    },
+                    "ELECTRONS": {
+                        "conv_thr": 1e-10,
+                        "mixing_beta": 0.25,
+                        "diago_david_ndim": 4,
+                        "startingpot": "atomic",
+                        "startingwfc": "atomic+random",
+                    },
+                }
+            ),
+            "code": code,
+            "metadata": {
+                "options": {
+                    "resources": {
+                        "num_machines": 1,
+                        "num_mpiprocs_per_machine": 32,
+                    },
+                    "max_wallclock_seconds": 3 * 60 * 60,
+                },
+            },
+        },
+    },
+}
+janusconfigfile_path = "/work4/scd/scarf1228/prova_train_workgraph/mlip_train.yml"
+janusconfigfile = JanusConfigfile(file=janusconfigfile_path)
+
+TrainWorkGraph(folder_path, inputs, janusconfigfile)
diff --git a/pyproject.toml b/pyproject.toml
index 329d678e..657920dc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,9 +30,8 @@ python = "^3.9"
 aiida-core = "^2.6"
 ase = "^3.23.0"
 voluptuous = "^0.14"
-#janus-core = "^v0.6.2"
+janus-core = "^v0.6.3b0"
 aiida-workgraph = {extras = ["widget"], version = "^0.3.14"}
-janus-core = { git = "https://github.com/stfc/janus-core.git", branch = "main" }
 aiida-quantumespresso = "^v4.6.0"
 scikit-learn = "^1.5.1"
 
@@ -83,6 +82,9 @@ build-backend = "poetry.core.masonry.api"
 "mlip.md_parser" = "aiida_mlip.parsers.md_parser:MDParser"
 "mlip.train_parser" = "aiida_mlip.parsers.train_parser:TrainParser"
 
+[tool.poetry.plugins."aiida.workflows"]
+"mlip.training_wg" = "aiida_mlip.workflows.traning_workgraph:TrainWorkGraph"
+
 [tool.black]
 line-length = 88
 

From 1f2389e2197db6400ec9f588044d79f499c6b5f8 Mon Sep 17 00:00:00 2001
From: federica <federicazanca8@gmail.com>
Date: Wed, 31 Jul 2024 15:58:13 +0100
Subject: [PATCH 8/9] fix pre-commit?

---
 aiida_mlip/workflows/training_workgraph.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/aiida_mlip/workflows/training_workgraph.py b/aiida_mlip/workflows/training_workgraph.py
index 19b7dc08..3eef6ac2 100644
--- a/aiida_mlip/workflows/training_workgraph.py
+++ b/aiida_mlip/workflows/training_workgraph.py
@@ -3,12 +3,12 @@
 from pathlib import Path
 
 from aiida_quantumespresso.workflows.pw.relax import PwRelaxWorkChain
-from aiida_workgraph.workgraph import WorkGraph, task
+from aiida_workgraph import WorkGraph, task
 from ase.io import read
 from sklearn.model_selection import train_test_split
 
-from aiida.orm import Dict, SinglefileData, load_code
-from aiida.plugins import CalculationFactory, WorkflowFactory, entry_point
+from aiida.orm import SinglefileData
+from aiida.plugins import CalculationFactory, WorkflowFactory
 
 from aiida_mlip.data.config import JanusConfigfile
 from aiida_mlip.helpers.help_load import load_structure
@@ -67,12 +67,12 @@ def create_input(**inputs: dict) -> SinglefileData:
     """
 
     input_data = []
-    for name, structure in inputs.items():
+    for _, structure in inputs.items():
         ase_structure = structure.to_ase()
         extxyz_str = ase_structure.write(format="extxyz")
         input_data.append(extxyz_str)
     temp_file_path = "tmp.extxyz"
-    with open(temp_file_path, "w") as temp_file:
+    with open(temp_file_path, "w", encoding="utf8") as temp_file:
         temp_file.write("\n".join(input_data))
 
     file_data = SinglefileData(file=temp_file_path)
@@ -113,11 +113,11 @@ def split_xyz_file(xyz_file: SinglefileData) -> dict:
     test_path = "test.extxyz"
     validation_path = "validation.extxyz"
 
-    with open(train_path, "w") as f:
+    with open(train_path, "w", encoding="utf8") as f:
         f.write("\n".join(train_data))
-    with open(test_path, "w") as f:
+    with open(test_path, "w", encoding="utf8") as f:
         f.write("\n".join(test_data))
-    with open(validation_path, "w") as f:
+    with open(validation_path, "w", encoding="utf8") as f:
         f.write("\n".join(validation_data))
 
     return {
@@ -152,12 +152,13 @@ def update_janusconfigfile(janusconfigfile: JanusConfigfile) -> JanusConfigfile:
 
     new_config_path = "./config.yml"
 
-    with open(new_config_path, "w") as file:
+    with open(new_config_path, "w", encoding="utf8") as file:
         file.write(content)
 
     return JanusConfigfile(file=new_config_path)
 
 
+# pylint: disable=unused-variable
 def TrainWorkGraph(
     folder_path: Path, inputs: dict, janusconfigfile: JanusConfigfile
 ) -> WorkGraph:
@@ -202,6 +203,7 @@ def TrainWorkGraph(
     training_calc = CalculationFactory("mlip.train")
     train_inputs = {}
     train_inputs["config_file"] = update_config_task.outputs.result
+
     train_task = wg.add_task(
         training_calc, name="training", mlip_config=update_config_task.outputs.result
     )

From 3deb4ff024fe13c0f3b22b72a1411c8f3b08d987 Mon Sep 17 00:00:00 2001
From: Alin Marin Elena <alin@elena.re>
Date: Wed, 20 Nov 2024 09:09:18 +0000
Subject: [PATCH 9/9] Apply suggestions from code review

Co-authored-by: Xing Wang <xingwang1991@gmail.com>
---
 aiida_mlip/workflows/training_workgraph.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/aiida_mlip/workflows/training_workgraph.py b/aiida_mlip/workflows/training_workgraph.py
index 3eef6ac2..f11351be 100644
--- a/aiida_mlip/workflows/training_workgraph.py
+++ b/aiida_mlip/workflows/training_workgraph.py
@@ -80,7 +80,10 @@ def create_input(**inputs: dict) -> SinglefileData:
     return file_data
 
 
-@task.calcfunction()
+@task.calcfunction(outputs = [{"name": train"},
+                              {"name": test"},
+                              {"name": "validation"}
+                             ])
 def split_xyz_file(xyz_file: SinglefileData) -> dict:
     """
     Split an XYZ file into training, testing, and validation datasets.
@@ -198,7 +201,7 @@ def TrainWorkGraph(
         janusconfigfile=janusconfigfile,
     )
 
-    wg.add_link(split_files_task.outputs["result"], update_config_task.inputs["_wait"])
+    wg.add_link(split_files_task.outputs["_wait"], update_config_task.inputs["_wait"])
 
     training_calc = CalculationFactory("mlip.train")
     train_inputs = {}