diff --git a/environment-workflows.yml b/environment-workflows.yml index 50a1caa..59c3573 100644 --- a/environment-workflows.yml +++ b/environment-workflows.yml @@ -1,4 +1,4 @@ -name: workflow-rdf-v0.1 +name: workflow-rdf-v0.2 channels: - conda-forge dependencies: diff --git a/environment.yml b/environment.yml index 32ab4ae..e6ca18c 100644 --- a/environment.yml +++ b/environment.yml @@ -16,3 +16,6 @@ dependencies: - owlready2 - plotly - ipywidgets + - sqlalchemy + - pip: + - "git+https://github.com/RDFLib/rdflib-sqlalchemy.git@develop" diff --git a/pyscal_rdf/__init__.py b/pyscal_rdf/__init__.py index 33f0026..b8764a2 100644 --- a/pyscal_rdf/__init__.py +++ b/pyscal_rdf/__init__.py @@ -1,2 +1,3 @@ from pyscal_rdf.graph import KnowledgeGraph -from pyscal_rdf.structure import System \ No newline at end of file +from pyscal_rdf.structure import System +from pyscal_rdf.workflow.workflow import Workflow \ No newline at end of file diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index bf0cbcb..1f7864a 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -5,8 +5,6 @@ """ from rdflib import Graph, Literal, Namespace, XSD, RDF, RDFS, BNode, URIRef, FOAF, SKOS, DCTERMS -from rdflib.store import NO_STORE, VALID_STORE -from rdflib import plugin import os import numpy as np @@ -26,9 +24,12 @@ from pyscal_rdf.network.ontology import read_ontology from pyscal_rdf.structure import System import pyscal_rdf.properties as prp +from pyscal_rdf.stores import create_store + #from pyscal3.core import System from pyscal3.atoms import Atoms + CMSO = Namespace("http://purls.helmholtz-metadaten.de/cmso/") PLDO = Namespace("http://purls.helmholtz-metadaten.de/pldo/") PODO = Namespace("http://purls.helmholtz-metadaten.de/podo/") @@ -70,12 +71,6 @@ def _replace_keys(refdict, indict): refdict[key] = val return refdict -def _setup_structure_store(structure_store): - if structure_store is None: - structure_store = os.path.join(os.getcwd(), 'rdf_structure_store') - if not os.path.exists(structure_store): - os.mkdir(structure_store) - return structure_store class KnowledgeGraph: def __init__(self, graph_file=None, @@ -85,36 +80,12 @@ def __init__(self, graph_file=None, ontology=None, structure_store=None): - self.store_file = store_file - self.structure_store = structure_store - - - if store == "Memory": - self.graph = Graph(store="Memory", identifier=identifier) - - - elif store=="SQLAlchemy": - #check for modules - try: - import sqlalchemy as sa - except ImportError: - raise RuntimeError('Please install the sqlalchemy package') - try: - import rdflib_sqlalchemy as rsa - except ImportError: - raise RuntimeError('Please install the rdllib-sqlalchemy package. The development version is needed, please do pip install git+https://github.com/RDFLib/rdflib-sqlalchemy.git@develop') - - if store_file is None: - raise ValueError("store file is needed if store is not memory") - - self.graph = Graph(store="SQLAlchemy", identifier=identifier) - uri = Literal(f"sqlite:///{store_file}") - self.graph.open(uri, create=True) - else: - raise ValueError("Memory or SQLAlchemy") + + create_store(self, store, identifier, + store_file=store_file, + structure_store=structure_store) #start the storage - self.structure_store = _setup_structure_store(self.structure_store) #start binding self.graph.bind("cmso", CMSO) @@ -135,7 +106,11 @@ def __init__(self, graph_file=None, self._atom_ids = None self.store = store - + + def add_structure(self, structure): + structure.graph = self + structure.to_graph() + def add(self, triple): if str(triple[2].toPython()) != 'None': self.graph.add(triple) diff --git a/pyscal_rdf/stores.py b/pyscal_rdf/stores.py new file mode 100644 index 0000000..2099ebc --- /dev/null +++ b/pyscal_rdf/stores.py @@ -0,0 +1,64 @@ +from rdflib.store import NO_STORE, VALID_STORE +from rdflib import plugin +from rdflib import Graph, Literal + +import os +#special methods; for supporting workflow envs +from pyscal_rdf.workflow import inform_graph + + +def create_store(kg, store, identifier, + store_file=None, + structure_store=None): + + kg.store_file = store_file + if store == 'Memory': + store_memory(kg, store, identifier, store_file=store_file, structure_store=structure_store) + elif store == 'SQLAlchemy': + store_alchemy(kg, store, identifier, store_file=store_file, structure_store=structure_store) + elif type(store).__name__ == 'Project': + store_pyiron(kg, store, identifier, store_file=store_file, structure_store=structure_store) + else: + raise ValueError('Unknown store found!') + + +def store_memory(kg, store, identifier, store_file=None, structure_store=None): + graph = Graph(store="Memory", identifier=identifier) + kg.graph = graph + kg.structure_store = _setup_structure_store(structure_store=structure_store) + +def store_alchemy(kg, store, identifier, store_file=None, structure_store=None): + _check_if_sqlalchemy_is_available() + if store_file is None: + raise ValueError("store file is needed if store is not memory") + + kg.graph = Graph(store="SQLAlchemy", identifier=identifier) + uri = Literal(f"sqlite:///{store_file}") + kg.graph.open(uri, create=True) + kg.structure_store = _setup_structure_store(structure_store=structure_store) + + +def store_pyiron(kg, store, identifier, store_file=None, structure_store=None): + structure_store = os.path.join(store.path, 'rdf_structure_store') + kg.structure_store = _setup_structure_store(structure_store=structure_store) + store_file = os.path.join(store.path, f'{store.name}.db') + store_alchemy(kg, store, identifier, store_file, structure_store=structure_store) + #finally update project object + inform_graph(store, kg) + +def _check_if_sqlalchemy_is_available(): + try: + import sqlalchemy as sa + except ImportError: + raise RuntimeError('Please install the sqlalchemy package') + try: + import rdflib_sqlalchemy as rsa + except ImportError: + raise RuntimeError('Please install the rdllib-sqlalchemy package. The development version is needed, please do pip install git+https://github.com/RDFLib/rdflib-sqlalchemy.git@develop') + +def _setup_structure_store(structure_store=None): + if structure_store is None: + structure_store = os.path.join(os.getcwd(), 'rdf_structure_store') + if not os.path.exists(structure_store): + os.mkdir(structure_store) + return structure_store diff --git a/pyscal_rdf/structure.py b/pyscal_rdf/structure.py index d94f721..ce501cb 100644 --- a/pyscal_rdf/structure.py +++ b/pyscal_rdf/structure.py @@ -42,7 +42,7 @@ def _make_crystal(structure, element=None, primitive=False, graph=None, - names=True): + names=False): atoms, box, sdict = pcs.make_crystal(structure, lattice_constant=lattice_constant, @@ -70,7 +70,7 @@ def _make_general_lattice(positions, noise = 0, element=None, graph=None, - names=True): + names=False): atoms, box, sdict = pcs.general_lattice(positions, types, @@ -98,7 +98,7 @@ def _make_grain_boundary(axis, repetitions = (1,1,1), overlap=0.0, graph=None, - names=True): + names=False): gb = GrainBoundary() gb.create_grain_boundary(axis=axis, sigma=sigma, @@ -194,7 +194,7 @@ def __init__(self, filename = None, species = None, source=None, graph=None, - names=True): + names=False): super().__init__(filename = filename, format = format, @@ -543,7 +543,7 @@ def _add_lattice_properties(self): """ data = self.schema.material.crystal_structure.unit_cell.lattice_parameter() lattice_parameter = URIRef(f'{self._name}_LatticeParameter') - self.graph.add((self.unit_cell, CMSO.hasLatticeParamter, lattice_parameter)) + self.graph.add((self.unit_cell, CMSO.hasLatticeParameter, lattice_parameter)) self.graph.add((lattice_parameter, RDF.type, CMSO.LatticeParameter)) self.graph.add((lattice_parameter, CMSO.hasLength_x, Literal(data[0], datatype=XSD.float))) self.graph.add((lattice_parameter, CMSO.hasLength_y, Literal(data[1], datatype=XSD.float))) diff --git a/pyscal_rdf/visualize.py b/pyscal_rdf/visualize.py index 102ed01..281bd1c 100644 --- a/pyscal_rdf/visualize.py +++ b/pyscal_rdf/visualize.py @@ -99,9 +99,10 @@ def visualize_graph(g, #we collapse sample information #if cmso.connector is found, only use it is it is cmso.hasCalculated #all sub sample props, indicated by sample_x_jsjsj will be ignored. + green_list = ["hasCalculatedProperty", "wasCalculatedBy", "hasValue"] ssplit = string3.split('.') if (len(ssplit) == 2): - if (ssplit[0] == 'cmso') and (ssplit[1] != "hasCalculatedProperty"): + if (ssplit[0] == 'cmso') and (ssplit[1] not in green_list): plot = False if string3 == 'subClassOf': plot = False diff --git a/pyscal_rdf/workflow.py b/pyscal_rdf/workflow.py deleted file mode 100644 index 69dc275..0000000 --- a/pyscal_rdf/workflow.py +++ /dev/null @@ -1,185 +0,0 @@ -""" -Workflows aspects for non-automated annotation of structures. - -""" - -from pyscal_rdf.rdfsystem import System -from pyscal_rdf import StructureGraph -from rdflib import Graph, Literal, Namespace, XSD, RDF, RDFS, BNode, URIRef, FOAF, SKOS, DCTERMS - -import warnings -import numpy as np -import os -import copy -import ast -import uuid - -PROV = Namespace("http://www.w3.org/ns/prov#") -CMSO = Namespace("http://purls.helmholtz-metadaten.de/cmso/") -PODO = Namespace("http://purls.helmholtz-metadaten.de/podo/") -ASO = Namespace("http://purls.helmholtz-metadaten.de/aso/") - -def annotate_md(graph, - structure, - id = None, - pressure = None, - ensemble = None, - temperature = None, - potential_doi = None, - potential_type = "", - software_id = None, - software = None, - ): - """ - Annotate a given structure with MD simulation details - """ - main_id = str(uuid.uuid4()) - if id is not None: - main_id = id - - sample = structure.sample - graph.add((sample, RDF.type, PROV.Entity)) - - activity = URIRef(f'activity:{main_id}') - graph.add((activity, RDF.type, PROV.Activity)) - graph.add((activity, RDF.type, ASO.StructureOptimization)) - - method = URIRef(f'method:{main_id}') - graph.add((method, RDF.type, ASO.MolecularDynamics)) - graph.add((activity, ASO.hasMethod, method)) - graph.add((activity, ASO.hasRelaxationDOF, ASO.AtomicPosition)) - - if pressure is None: - pass - elif np.isscalar(pressure): - graph.add((activity, ASO.hasRelaxationDOF, ASO.CellVolume)) - else: - #check if pressure is hydrostatic or not - axial_all_alike = None not in pressure[:3] and np.allclose( - pressure[:3], pressure[0] - ) - shear_all_none = all(p is None for p in pressure[3:]) - shear_all_zero = None not in pressure[3:] and np.allclose(pressure[3:], 0) - hydrostatic = axial_all_alike and (shear_all_none or shear_all_zero) - if hydrostatic: - graph.add((activity, ASO.hasRelaxationDOF, ASO.CellVolume)) - else: - graph.add((activity, ASO.hasRelaxationDOF, ASO.CellVolume)) - graph.add((activity, ASO.hasRelaxationDOF, ASO.CellShape)) - - if ensemble is not None: - if ensemble == 'NVT': - graph.add((method, ASO.hasStatisticalEnsemble, ASO.NVT)) - if temperature is None: - raise ValueError('Temperature cannot be None in NVT') - temperature_node = URIRef(f'temperature:{main_id}') - graph.add((temperature_node, RDF.type, ASO.InputParameter)) - graph.add((temperature_node, RDFS.label, Literal('temperature', datatype=XSD.string))) - graph.add((activity, ASO.hasInputParameter, temperature_node)) - graph.add((temperature_node, ASO.hasValue, Literal(temperature, datatype=XSD.float))) - graph.add((temperature_node, ASO.hasUnit, URIRef('http://qudt.org/vocab/unit/K'))) - - elif ensemble == 'NPT': - graph.add((method, ASO.hasStatisticalEnsemble, ASO.NPT)) - if temperature is None: - raise ValueError('Temperature cannot be None in NPT') - temperature_node = URIRef(f'temperature:{main_id}') - graph.add((temperature_node, RDF.type, ASO.InputParameter)) - graph.add((temperature_node, RDFS.label, Literal('temperature', datatype=XSD.string))) - graph.add((activity, ASO.hasInputParameter, temperature_node)) - graph.add((temperature_node, ASO.hasValue, Literal(temperature, datatype=XSD.float))) - graph.add((temperature_node, ASO.hasUnit, URIRef('http://qudt.org/vocab/unit/K'))) - - pressure_node = URIRef(f'pressure:{main_id}') - graph.add((pressure_node, RDF.type, ASO.InputParameter)) - graph.add((pressure_node, RDFS.label, Literal('pressure', datatype=XSD.string))) - graph.add((activity, ASO.hasInputParameter, pressure_node)) - graph.add((pressure_node, ASO.hasValue, Literal(pressure, datatype=XSD.float))) - graph.add((pressure_node, ASO.hasUnit, URIRef('http://qudt.org/vocab/unit/GigaPA'))) - - graph.add((sample, PROV.wasGeneratedBy, activity)) - - if potential_doi is None: - warnings.warn('potential_doi is None, maybe consider providing it?') - else: - potential = URIRef(f'potential:{main_id}') - - if 'meam' in potential_type.lower(): - graph.add((potential, RDF.type, ASO.MEAM)) - elif 'eam' in potential_type.lower(): - graph.add((potential, RDF.type, ASO.EAM)) - elif 'lj' in potential_type.lower(): - graph.add((potential, RDF.type, ASO.LennardJones)) - elif 'ace' in potential_type.lower(): - graph.add((potential, RDF.type, ASO.MLPotential)) - elif 'snap' in potential_type.lower(): - graph.add((potential, RDF.type, ASO.MLPotential)) - elif 'tersoff' in potential_type.lower(): - graph.add((potential, RDF.type, ASO.Tersoff)) - else: - graph.add((potential, RDF.type, ASO.InteratomicPotential)) - - graph.add((potential, ASO.hasReference, Literal(potential_doi))) - graph.add((method, ASO.hasInteratomicPotential, potential)) - - if software_id is not None: - software_agent = URIRef(software_id) - graph.add((software_agent, RDF.type, PROV.SoftwareAgent)) - graph.add((activity, PROV.wasAssociatedWith, software_agent)) - if software is not None: - graph.add((software_agent, RDFS.label, Literal(software))) - -def annotate_dft(graph, - structure, - id = None, - pressure = None, - software_id = None, - software = None, - ): - """ - Annotate a given structure with MD simulation details - """ - main_id = str(uuid.uuid4()) - if id is not None: - main_id = id - - sample = structure.sample - graph.add((sample, RDF.type, PROV.Entity)) - - activity = URIRef(f'activity:{main_id}') - graph.add((activity, RDF.type, PROV.Activity)) - graph.add((activity, RDF.type, ASO.StructureOptimization)) - - method = URIRef(f'method:{main_id}') - graph.add((method, RDF.type, ASO.DensityFunctionalTheory)) - graph.add((activity, ASO.hasMethod, method)) - graph.add((activity, ASO.hasRelaxationDOF, ASO.AtomicPosition)) - - if pressure is None: - pass - elif np.isscalar(pressure): - graph.add((activity, ASO.hasRelaxationDOF, ASO.CellVolume)) - else: - #check if pressure is hydrostatic or not - axial_all_alike = None not in pressure[:3] and np.allclose( - pressure[:3], pressure[0] - ) - shear_all_none = all(p is None for p in pressure[3:]) - shear_all_zero = None not in pressure[3:] and np.allclose(pressure[3:], 0) - hydrostatic = axial_all_alike and (shear_all_none or shear_all_zero) - if hydrostatic: - graph.add((activity, ASO.hasRelaxationDOF, ASO.CellVolume)) - else: - graph.add((activity, ASO.hasRelaxationDOF, ASO.CellVolume)) - graph.add((activity, ASO.hasRelaxationDOF, ASO.CellShape)) - - - graph.add((sample, PROV.wasGeneratedBy, activity)) - - if software_id is not None: - software_agent = URIRef(software_id) - graph.add((software_agent, RDF.type, PROV.SoftwareAgent)) - graph.add((activity, PROV.wasAssociatedWith, software_agent)) - if software is not None: - graph.add((software_agent, RDFS.label, Literal(software))) - diff --git a/pyscal_rdf/workflow/__init__.py b/pyscal_rdf/workflow/__init__.py new file mode 100644 index 0000000..67063bf --- /dev/null +++ b/pyscal_rdf/workflow/__init__.py @@ -0,0 +1,2 @@ +from pyscal_rdf.workflow.pyiron import inform_graph + diff --git a/pyscal_rdf/workflow/pyiron.py b/pyscal_rdf/workflow/pyiron.py new file mode 100644 index 0000000..d1cf6f8 --- /dev/null +++ b/pyscal_rdf/workflow/pyiron.py @@ -0,0 +1,240 @@ +""" +Wrappers for pyiron jobs +""" +import os +import numpy as np +from functools import partial, update_wrapper +import pyscal_rdf.workflow.workflow as wf +from pyscal_rdf.structure import _make_crystal +from pyscal_rdf.structure import System +from pyscal3.core import structure_dict, element_dict +import ast + +def _check_if_job_is_valid(job): + valid_jobs = ['Lammps', ] + + if not type(job).__name__ in valid_jobs: + raise TypeError('These type of pyiron Job is not currently supported') + + +def _add_structures(job): + initial_pyiron_structure = job.structure + final_pyiron_structure = job.get_structure(frame=-1) + initial_pyscal_structure = System.read.ase(initial_pyiron_structure) + + initial_sample_id = None + if 'sample_id' in initial_pyiron_structure.info.keys(): + initial_sample_id = initial_pyiron_structure.info['sample_id'] + #add final structure + final_pyscal_structure = System.read.ase(final_pyiron_structure) + + #now we do rthe transfer + return initial_pyscal_structure, initial_sample_id, final_pyscal_structure, None + + +def _identify_method(job): + job_dict = job.input.to_dict() + input_dict = {job_dict['control_inp/data_dict']['Parameter'][x]:job_dict['control_inp/data_dict']['Value'][x] for x in range(len(job_dict['control_inp/data_dict']['Parameter']))} + dof = [] + temp = None + press = None + md_method = None + ensemble = None + + if 'min_style' in input_dict.keys(): + dof.append('AtomicPosition') + dof.append('CellVolume') + md_method = 'MolecularStatics' + + elif 'nve' in input_dict['fix___ensemble']: + if int(input_dict['run']) == 0: + method = 'static' + md_method = 'MolecularStatics' + ensemble = 'NVE' + + elif int(input_dict['run']) > 0: + method = 'md_nve' + dof.append('AtomicPosition') + md_method = 'MolecularDynamics' + ensemble = 'NVE' + + + elif 'nvt' in input_dict['fix___ensemble']: + method = 'md_nvt' + raw = input_dict['fix___ensemble'].split() + temp = float(raw[3]) + dof.append('AtomicPosition') + md_method = 'MolecularDynamics' + ensemble = 'NVT' + + elif 'npt' in input_dict['fix___ensemble']: + dof.append('AtomicPosition') + dof.append('CellVolume') + if 'aniso' in input_dict['fix___ensemble']: + method = 'md_npt_aniso' + dof.append('CellShape') + else: + method = 'md_npt_iso' + md_method = 'MolecularDynamics' + raw = input_dict['fix___ensemble'].split() + temp = float(raw[3]) + press = float(raw[7]) + ensemble = 'NPT' + + mdict = {} + mdict['md'] = {} + mdict['md']['method'] = md_method + mdict['md']['temperature'] = temp + mdict['md']['pressure'] = press + mdict['md']['dof'] = dof + mdict['md']['ensemble'] = ensemble + mdict['md']['id'] = job.id + + #now process potential + inpdict = job.input.to_dict() + ps = inpdict['potential_inp/data_dict']['Value'][0] + name = inpdict['potential_inp/potential/Name'] + potstr = job.input.to_dict()['potential_inp/potential/Citations'] + potdict = ast.literal_eval(potstr[1:-1]) + url = None + if 'url' in potdict[list(potdict.keys())[0]].keys(): + url = potdict[list(potdict.keys())[0]]['url'] + + mdict['md']['potential'] = {} + mdict['md']['potential']['type'] = ps + mdict['md']['potential']['label'] = name + if url is not None: + mdict['md']['potential']['uri'] = url + else: + mdict['md']['potential']['uri'] = name + + + mdict['md']['workflow_manager'] = {} + mdict['md']['workflow_manager']['uri'] = "http://demo.fiz-karlsruhe.de/matwerk/E457491" + mdict['md']['workflow_manager']['label'] = "pyiron" + #and finally code details + + + software = {'uri':"http://demo.fiz-karlsruhe.de/matwerk/E447986", + 'label':'LAMMPS'} + mdict['md']['software'] = [software] + + #finally add calculated quantities + quantdict = extract_calculated_quantities(job) + mdict['md']['outputs'] = quantdict + return mdict + + +def extract_calculated_quantities(job): + aen = np.mean(job.output.energy_tot) + avol = np.mean(job.output.volume) + outdict = {} + outdict['TotalEnergy'] = {} + outdict['TotalEnergy']['value'] = np.round(aen, decimals=4) + outdict['TotalEnergy']['unit'] = 'EV' + outdict['TotalEnergy']['associate_to_sample'] = True + + + outdict['TotalVolume'] = {} + outdict['TotalVolume']['value'] = np.round(avol, decimals=4) + outdict['TotalVolume']['unit'] = 'ANGSTROM3' + outdict['TotalVolume']['associate_to_sample'] = True + + return outdict + + + +def inform_graph(pr, kg): + """ + Update project to add extra creator functions + """ + + try: + from pyiron_base import Creator, PyironFactory + from pyiron_atomistics.atomistics.structure.atoms import ase_to_pyiron, pyiron_to_ase + import pyiron_atomistics.atomistics.structure.factory as sf + except ImportError: + raise ImportError('Please install pyiron_base and pyiron_atomistics') + + class AnnotatedStructureFactory: + def __init__(self, graph): + self._graph = graph + + def bulk(self, + element, + repetitions=None, + crystalstructure=None, + a=None, + covera=None, + cubic=True, + graph=None): + + if crystalstructure is None: + crystalstructure = element_dict[element]['structure'] + if a is None: + a = element_dict[element]['lattice_constant'] + + struct = _make_crystal(crystalstructure, + repetitions=repetitions, + lattice_constant=a, + ca_ratio = covera, + element = element, + primitive = not cubic, + graph=self._graph, + ) + + ase_structure = struct.write.ase() + pyiron_structure = ase_to_pyiron(ase_structure) + pyiron_structure.info['sample_id'] = struct.sample + return pyiron_structure + + def grain_boundary(self, + element, + axis, + sigma, + gb_plane, + repetitions = (1,1,1), + crystalstructure=None, + a=1, + overlap=0.0, + graph=None, + ): + + struct = self._graph._annotated_make_grain_boundary(axis, + sigma, + gb_plane, + structure = crystalstructure, + element=element, + lattice_constant=a, + repetitions=repetitions, + overlap=overlap, + graph=self._graph) + + ase_structure = struct.write.ase() + pyiron_structure = ase_to_pyiron(ase_structure) + pyiron_structure.info['sample_id'] = struct.sample + return pyiron_structure + + + class StructureFactory(sf.StructureFactory): + def __init__(self, graph): + super().__init__() + self._annotated_structure = AnnotatedStructureFactory(graph) + + @property + def annotated_structure(self): + return self._annotated_structure + + + class StructureCreator(Creator): + def __init__(self, project): + super().__init__(project) + self._structure = StructureFactory(project.graph) + + @property + def structure(self): + return self._structure + + pr.graph = kg + pr._creator = StructureCreator(pr) + \ No newline at end of file diff --git a/pyscal_rdf/workflow/workflow.py b/pyscal_rdf/workflow/workflow.py new file mode 100644 index 0000000..1e29788 --- /dev/null +++ b/pyscal_rdf/workflow/workflow.py @@ -0,0 +1,296 @@ +""" +Workflows aspects for non-automated annotation of structures. + +This consists of a workflow class which implements the necessary methods to serialise triples as needed. +Custom workflow solutions can be implemented. An example available here is pyiron. +The custom workflow env should implement the following functions: + +_check_if_job_is_valid +_add_structure +_identify_method +extract_calculated_properties +inform_graph + +See pyscal_rdf.workflow.pyiron for more details +""" + +from pyscal_rdf.structure import System +from rdflib import Graph, Literal, Namespace, XSD, RDF, RDFS, BNode, URIRef, FOAF, SKOS, DCTERMS + +import warnings +import numpy as np +import os +import copy +import ast +import uuid + +#Move imports to another file +PROV = Namespace("http://www.w3.org/ns/prov#") +CMSO = Namespace("http://purls.helmholtz-metadaten.de/cmso/") +PODO = Namespace("http://purls.helmholtz-metadaten.de/podo/") +ASO = Namespace("http://purls.helmholtz-metadaten.de/aso/") + +#custom imports as needed +import pyscal_rdf.workflow.pyiron as pi + + +class Workflow: + def __init__(self, kg, + environment='pyiron'): + """ + Initialize the workflow environment + + Parameters + ---------- + kg: pyscal-rdf KnowledgeGraph + environment: string + the workflow environment. This is used to import the necessary functions. + + """ + self.kg = kg + if environment == 'pyiron': + self.wenv = pi + else: + raise ValueError('unknow workflow environment') + + def _prepare_job(self, workflow_object): + self.wenv._check_if_job_is_valid(workflow_object) + parent_structure, parent_sample, structure, sample = self.wenv._add_structures(workflow_object) + method_dict = self.wenv._identify_method(workflow_object) + + if (structure is None) and (sample is None): + raise ValueError('Either structure or sample should be specified') + + if sample is None: + #its not added to graph yet + structure.graph = self.kg + structure.to_graph() + sample = structure.sample + + if parent_sample is None: + #its not added to graph yet + parent_structure.graph = self.kg + parent_structure.to_graph() + parent_sample = parent_structure.sample + + self.sample = sample + self.mdict = method_dict + self.parent_sample = parent_sample + + def _add_inherited_properties(self, ): + #Here we need to add inherited info: CalculatedProperties will be lost + #Defects will be inherited + if self.parent_sample is None: + return + + parent_material = list([k[2] for k in self.kg.graph.triples((self.parent_sample, CMSO.hasMaterial, None))])[0] + parent_defects = list([x[2] for x in self.kg.graph.triples((parent_material, CMSO.hasDefect, None))]) + #now for each defect we copy add this to the final sample + material = list([k[2] for k in self.kg.graph.triples((self.sample, CMSO.hasMaterial, None))])[0] + + for defect in parent_defects: + new_defect = URIRef(defect.toPython()) + self.kg.graph.add((material, CMSO.hasDefect, new_defect)) + #now fetch all defect based info + for triple in self.kg.graph.triples((defect, None, None)): + self.kg.graph.add((new_defect, triple[1], triple[2])) + + #now add the special props for vacancy + parent_simcell = self.kg.graph.value(self.sample, CMSO.hasSimulationCell) + simcell = self.kg.graph.value(self.parent_sample, CMSO.hasSimulationCell) + + for triple in self.kg.graph.triples((parent_simcell, PODO.hasVacancyConcentration, None)): + self.kg.graph.add((simcell, triple[1], triple[2])) + for triple in self.kg.graph.triples((parent_simcell, PODO.hasNumberOfVacancies, None)): + self.kg.graph.add((simcell, triple[1], triple[2])) + + def _get_lattice_properties(self, ): + if self.parent_sample is None: + return + + parent_material = list([k[2] for k in self.kg.graph.triples((self.parent_sample, CMSO.hasMaterial, None))])[0] + parent_crystal_structure = self.kg.graph.value(parent_material, CMSO.hasStructure) + parent_altname = self.kg.graph.value(parent_crystal_structure, CMSO.hasAltName) + + #add this to new structure + material = list([k[2] for k in self.kg.graph.triples((self.sample, CMSO.hasMaterial, None))])[0] + crystal_structure = self.kg.graph.value(material, CMSO.hasStructure) + self.kg.add((crystal_structure, CMSO.hasAltName, parent_altname)) + + #space group + parent_space_group = self.kg.graph.value(parent_crystal_structure, CMSO.hasSpaceGroup) + space_group = self.kg.graph.value(crystal_structure, CMSO.hasSpaceGroup) + for triple in self.kg.graph.triples((parent_space_group, None, None)): + self.kg.graph.add((space_group, triple[1], triple[2])) + + #unit cell + parent_unit_cell = self.kg.graph.value(parent_crystal_structure, CMSO.hasUnitCell) + parent_bv = self.kg.graph.value(parent_unit_cell, CMSO.hasBravaisLattice) + + unit_cell = self.kg.graph.value(crystal_structure, CMSO.hasUnitCell) + self.kg.graph.add((unit_cell, CMSO.hasBravaisLattice, parent_bv)) + + #lattice parameter + parent_lattice_parameter = self.kg.graph.value(parent_unit_cell, CMSO.hasLatticeParameter) + lattice_parameter = self.kg.graph.value(unit_cell, CMSO.hasLatticeParameter) + for triple in self.kg.graph.triples((parent_lattice_parameter, None, None)): + self.kg.graph.add((lattice_parameter, triple[1], triple[2])) + + #lattice angle + parent_lattice_angle = self.kg.graph.value(parent_unit_cell, CMSO.hasAngle) + lattice_angle = self.kg.graph.value(unit_cell, CMSO.hasAngle) + for triple in self.kg.graph.triples((parent_lattice_angle, None, None)): + self.kg.graph.add((lattice_angle, triple[1], triple[2])) + + + def add_structural_relation(self, ): + self.kg.add((self.sample, RDF.type, PROV.Entity)) + if self.parent_sample is not None: + self.kg.add((self.parent_sample, RDF.type, PROV.Entity)) + self.kg.add((self.sample, PROV.wasDerivedFrom, self.parent_sample)) + self._add_inherited_properties() + self._get_lattice_properties() + + + def add_method(self, ): + """ + mdict + ----- + md: + method: MolecularStatics + temperature: 100 + pressure: 0 + dof: + - AtomicPositions + - CellVolume + ensemble: NPT + id: 2314 + potential: + uri: https://doi.org/xxx + type: eam + label: string + workflow_manager: + uri: xxxx + label: pyiron + software: + - uri: xxxx + label: lammps + - uri: xxxx + label: pyscal + + """ + if self.mdict is None: + return + + if 'md' in self.mdict.keys(): + method_type = 'md' + mdict = self.mdict['md'] + elif 'dft' in self.mdict.keys(): + method_type = 'dft' + mdict = self.mdict['dft'] + else: + raise KeyError('method dict keys should be either md or dft') + + + #add activity + main_id = mdict['id'] + activity = URIRef(f'activity:{main_id}') + self.kg.add((activity, RDF.type, PROV.Activity)) + + #method, this is specific to dft/md + if method_type == 'md': + method = URIRef(f'method:{main_id}') + if mdict['method'] == 'MolecularStatics': + self.kg.add((method, RDF.type, ASO.MolecularStatics)) + elif mdict['method'] == 'MolecularDynamics': + self.kg.add((method, RDF.type, ASO.MolecularDynamics)) + elif method_type == 'dft': + method = URIRef(f'method:{main_id}') + if mdict['method'] == 'DensityFunctionalTheory': + self.kg.add((method, RDF.type, ASO.DensityFunctionalTheory)) + self.kg.add((activity, ASO.hasMethod, method)) + + if len(mdict['dof']) == 0: + self.kg.add((activity, RDF.type, ASO.RigidEnergyCalculation)) + else: + self.kg.add((activity, RDF.type, ASO.StructureOptimization)) + + for dof in mdict['dof']: + self.kg.add((activity, ASO.hasRelaxationDOF, getattr(ASO, dof))) + + if method_type == 'md': + self.kg.add((method, ASO.hasStatisticalEnsemble, getattr(ASO, mdict['ensemble']))) + + #add temperature if needed + if mdict['temperature'] is not None: + temperature = URIRef(f'temperature:{main_id}') + self.kg.add((temperature, RDF.type, ASO.InputParameter)) + self.kg.add((temperature, RDFS.label, Literal('temperature', datatype=XSD.string))) + self.kg.add((activity, ASO.hasInputParameter, temperature)) + self.kg.add((temperature, ASO.hasValue, Literal(mdict['temperature'], datatype=XSD.float))) + self.kg.add((temperature, ASO.hasUnit, URIRef('http://qudt.org/vocab/unit/K'))) + + if mdict['pressure'] is not None: + pressure = URIRef(f'pressure:{main_id}') + self.kg.add((pressure, RDF.type, ASO.InputParameter)) + self.kg.add((pressure, RDFS.label, Literal('pressure', datatype=XSD.string))) + self.kg.add((activity, ASO.hasInputParameter, pressure)) + self.kg.add((pressure, ASO.hasValue, Literal(mdict['pressure'], datatype=XSD.float))) + self.kg.add((pressure, ASO.hasUnit, URIRef('http://qudt.org/vocab/unit/GigaPA'))) + + #potentials need to be mapped + potential = URIRef(f'potential:{main_id}') + if 'meam' in mdict['potential']['type']: + self.kg.add((potential, RDF.type, ASO.MEAM)) + elif 'eam' in mdict['potential']['type']: + self.kg.add((potential, RDF.type, ASO.EAM)) + elif 'lj' in mdict['potential']['type']: + self.kg.add((potential, RDF.type, ASO.LennardJones)) + elif 'ace' in mdict['potential']['type']: + self.kg.add((potential, RDF.type, ASO.MLPotential)) + else: + self.kg.add((potential, RDF.type, ASO.InteratomicPotential)) + + if 'uri' in mdict['potential'].keys(): + self.kg.add((potential, ASO.hasReference, Literal(mdict['potential']['uri']))) + if 'label' in mdict['potential'].keys(): + self.kg.add((potential, RDFS.label, Literal(mdict['potential']['label']))) + + self.kg.add((method, ASO.hasInteratomicPotential, potential)) + + self.kg.add((self.sample, PROV.wasGeneratedBy, activity)) + + #finally add software + wfagent = None + if 'workflow_manager' in mdict.keys(): + wfagent = URIRef(mdict["workflow_manager"]['uri']) + self.kg.add((wfagent, RDF.type, PROV.SoftwareAgent)) + self.kg.add((wfagent, RDFS.label, Literal(mdict["workflow_manager"]['label']))) + self.kg.add((method, PROV.wasAssociatedWith, wfagent)) + + for software in mdict['software']: + agent = URIRef(software['uri']) + self.kg.add((agent, RDF.type, PROV.SoftwareAgent)) + self.kg.add((agent, RDFS.label, Literal(software['label']))) + + if wfagent is not None: + self.kg.add((wfagent, PROV.actedOnBehalfOf, agent)) + else: + self.kg.add((method, PROV.wasAssociatedWith, agent)) + + for key, val in mdict['outputs'].items(): + prop = URIRef(f'{main_id}_{key}') + self.kg.add((prop, RDF.type, CMSO.CalculatedProperty)) + self.kg.add((prop, RDFS.label, Literal(key))) + self.kg.add((prop, ASO.hasValue, Literal(val["value"]))) + if "unit" in val.keys(): + unit = val['unit'] + self.kg.add((prop, ASO.hasUnit, URIRef(f'http://qudt.org/vocab/unit/{unit}'))) + self.kg.add((prop, CMSO.wasCalculatedBy, activity)) + if val['associate_to_sample']: + self.kg.add((self.sample, CMSO.hasCalculatedProperty, prop)) + + def to_graph(self, workflow_object): + self._prepare_job(workflow_object) + self.add_structural_relation() + self.add_method()