From c146a788cf341ea643f9a8df31617fbe18e71bbb Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Thu, 18 Apr 2024 16:22:17 +0200 Subject: [PATCH] format code --- atomrdf/__init__.py | 2 +- atomrdf/encoder.py | 4 +- atomrdf/graph.py | 542 ++++++++------- atomrdf/json_io.py | 11 +- atomrdf/namespace.py | 29 +- atomrdf/network/network.py | 244 ++++--- atomrdf/network/ontology.py | 123 ++-- atomrdf/network/parser.py | 153 ++--- atomrdf/network/patch.py | 28 +- atomrdf/network/term.py | 112 +-- atomrdf/properties.py | 99 ++- atomrdf/stores.py | 62 +- atomrdf/structure.py | 1246 ++++++++++++++++++++++------------ atomrdf/visualize.py | 150 ++-- atomrdf/workflow/__init__.py | 1 - atomrdf/workflow/pyiron.py | 239 ++++--- atomrdf/workflow/workflow.py | 308 ++++++--- 17 files changed, 2037 insertions(+), 1316 deletions(-) diff --git a/atomrdf/__init__.py b/atomrdf/__init__.py index 4ca49b6..b667125 100644 --- a/atomrdf/__init__.py +++ b/atomrdf/__init__.py @@ -1,3 +1,3 @@ from atomrdf.graph import KnowledgeGraph from atomrdf.structure import System -from atomrdf.workflow.workflow import Workflow \ No newline at end of file +from atomrdf.workflow.workflow import Workflow diff --git a/atomrdf/encoder.py b/atomrdf/encoder.py index 2d8f060..3e4aa81 100644 --- a/atomrdf/encoder.py +++ b/atomrdf/encoder.py @@ -1,10 +1,12 @@ import numpy as np from json import JSONEncoder + class NumpyArrayEncoder(JSONEncoder): """ Encode numpy to dump in json """ + def default(self, obj): if isinstance(obj, np.integer): return int(obj) @@ -13,4 +15,4 @@ def default(self, obj): elif isinstance(obj, np.ndarray): return obj.tolist() else: - return super(NumpyArrayEncoder, self).default(obj) \ No newline at end of file + return super(NumpyArrayEncoder, self).default(obj) diff --git a/atomrdf/graph.py b/atomrdf/graph.py index e94a4a6..2c75385 100644 --- a/atomrdf/graph.py +++ b/atomrdf/graph.py @@ -8,7 +8,7 @@ - To ensure domain and range checking works as expected, always add type before adding further properties! """ -from rdflib import Graph, Literal, XSD, RDF, RDFS, BNode, URIRef +from rdflib import Graph, Literal, XSD, RDF, RDFS, BNode, URIRef import os import numpy as np @@ -24,7 +24,7 @@ import logging import warnings -#from pyscal3.core import System +# from pyscal3.core import System from pyscal3.atoms import Atoms from atomrdf.visualize import visualize_graph @@ -37,31 +37,37 @@ from atomrdf.namespace import Namespace, CMSO, PLDO, PODO, ASMO -#read element data file -file_location = os.path.dirname(__file__).split('/') +# read element data file +file_location = os.path.dirname(__file__).split("/") file_location = "/".join(file_location[:-1]) -file_location = os.path.join(os.path.dirname(__file__), 'data/element.yml') -with open(file_location, 'r') as fin: +file_location = os.path.join(os.path.dirname(__file__), "data/element.yml") +with open(file_location, "r") as fin: element_indetifiers = yaml.safe_load(fin) defstyledict = { "edgecolor": "#263238", - "BNode": {"color": "#D9D9D9", - "shape": "box", - "style": "filled", - "fontsize": "8", - "fontname": "Helvetica"}, - "URIRef": {"color": "#C9DAF8", - "shape": "box", - "style": "filled", - "fontsize": "8", - "fontname": "Helvetica"}, - "Literal": {"color": "#E6B8AF", - "shape": "parallelogram", - "style": "filled", - "fontsize": "8", - "fontname": "Helvetica"}, + "BNode": { + "color": "#D9D9D9", + "shape": "box", + "style": "filled", + "fontsize": "8", + "fontname": "Helvetica", + }, + "URIRef": { + "color": "#C9DAF8", + "shape": "box", + "style": "filled", + "fontsize": "8", + "fontname": "Helvetica", + }, + "Literal": { + "color": "#E6B8AF", + "shape": "parallelogram", + "style": "filled", + "fontsize": "8", + "fontname": "Helvetica", + }, } @@ -74,9 +80,11 @@ def _replace_keys(refdict, indict): refdict[key] = val return refdict + def _dummy_log(str): pass + def _prepare_log(file): logger = logging.getLogger(__name__) handler = logging.FileHandler(file) @@ -89,34 +97,40 @@ def _prepare_log(file): class KnowledgeGraph: - def __init__(self, graph_file=None, - store="Memory", + def __init__( + self, + graph_file=None, + store="Memory", store_file=None, identifier="http://default_graph", ontology=None, structure_store=None, - enable_log=False): - + enable_log=False, + ): - create_store(self, store, identifier, + create_store( + self, + store, + identifier, store_file=store_file, - structure_store=structure_store) - - #enable logging + structure_store=structure_store, + ) + + # enable logging if enable_log: - logger = _prepare_log(os.path.join(os.getcwd(), 'atomrdf.log')) + logger = _prepare_log(os.path.join(os.getcwd(), "atomrdf.log")) self.log = logger.info else: self.log = _dummy_log - #start binding + # start binding self.graph.bind("cmso", CMSO) self.graph.bind("pldo", PLDO) - + if graph_file is not None: if os.path.exists(graph_file): self.graph.parse(graph_file) - + self.sample = None self.material = None self.sysdict = None @@ -130,7 +144,6 @@ def __init__(self, graph_file=None, self._n_triples = 0 self._initialize_graph() - def add_structure(self, structure): structure.graph = self structure.to_graph() @@ -139,7 +152,7 @@ def _is_valid(self, input_list): valid = False flat_list = [] for x in input_list: - if isinstance(x,list): + if isinstance(x, list): flat_list.extend(x) else: flat_list.append(x) @@ -150,7 +163,7 @@ def _is_valid(self, input_list): return valid def _is_ontoterm(self, term): - return type(term).__name__ == 'OntoTerm' + return type(term).__name__ == "OntoTerm" def _modify_triple(self, triple): modified_triple = [] @@ -165,105 +178,108 @@ def _check_domain_if_uriref(self, triple): found = True dm = self.value(triple[0], RDF.type) if dm is not None: - #we need to check - domain = triple[1].domain + # we need to check + domain = triple[1].domain if len(domain) > 0: - if 'owl:Thing' not in domain: + if "owl:Thing" not in domain: if triple[1].namespace_with_prefix not in dm: - #cross ontology term - self.log(f'ignoring possible cross ontology connection between {triple[1].namespace} and {dm}') + # cross ontology term + self.log( + f"ignoring possible cross ontology connection between {triple[1].namespace} and {dm}" + ) return True, None found = False for d in domain: - if d.split(':')[-1] in dm: + if d.split(":")[-1] in dm: found = True break return found, dm -# def _check_domain_if_ontoterm(self, triple): -# found = True -# domain = triple[0].domain -# if len(domain) > 0: -# if 'owl:Thing' not in domain: -# if triple[1].namespace != triple[0].namespace: -# #cross ontology term -# self.log(f'ignoring possible cross ontology connection between {triple[1].namespace} and {triple[0].namespace}') -# return True, None -# found = False -# if triple[1].name in domain: -# found = True -# return found, triple[0].name - - + # def _check_domain_if_ontoterm(self, triple): + # found = True + # domain = triple[0].domain + # if len(domain) > 0: + # if 'owl:Thing' not in domain: + # if triple[1].namespace != triple[0].namespace: + # #cross ontology term + # self.log(f'ignoring possible cross ontology connection between {triple[1].namespace} and {triple[0].namespace}') + # return True, None + # found = False + # if triple[1].name in domain: + # found = True + # return found, triple[0].name + def _check_domain(self, triple): if self._is_ontoterm(triple[1]): - #check if type was provided + # check if type was provided found = True dm = None - - if type(triple[0]).__name__ == 'URIRef': + + if type(triple[0]).__name__ == "URIRef": found, dm = self._check_domain_if_uriref(triple) - #elif self._is_ontoterm(triple[0]): + # elif self._is_ontoterm(triple[0]): # found, dm = self._check_domain_if_ontoterm(triple) if not found: - raise ValueError(f'{dm} not in domain of {triple[1].name}') - - self.log(f'checked {triple[1].name} against domain {dm}') + raise ValueError(f"{dm} not in domain of {triple[1].name}") + self.log(f"checked {triple[1].name} against domain {dm}") def _check_range_if_uriref(self, triple): found = True rn = self.value(triple[2], RDF.type) if rn is not None: - #we need to check - rang = triple[1].range + # we need to check + rang = triple[1].range if len(rang) > 0: - if 'owl:Thing' not in rang: + if "owl:Thing" not in rang: if triple[1].namespace_with_prefix not in rn: - #cross ontology term - self.log(f'ignoring possible cross ontology connection between {triple[1].namespace} and {rn}') + # cross ontology term + self.log( + f"ignoring possible cross ontology connection between {triple[1].namespace} and {rn}" + ) return True, None found = False for r in rang: - if r.split(':')[-1] in rn: + if r.split(":")[-1] in rn: found = True break return found, rn -# def _check_range_if_ontoterm(self, triple): -# found = True -# rang = triple[1].range -# if len(rang) > 0: -# if 'owl:Thing' not in rang: -# if triple[1].namespace != triple[2].namespace: -# #cross ontology term -# self.log(f'ignoring possible cross ontology connection between {triple[1].namespace} and {triple[2].namespace}') -# return True, None -# -# found = False -# if triple[2].name in rang: -# found = True -# return found, triple[2].name - + # def _check_range_if_ontoterm(self, triple): + # found = True + # rang = triple[1].range + # if len(rang) > 0: + # if 'owl:Thing' not in rang: + # if triple[1].namespace != triple[2].namespace: + # #cross ontology term + # self.log(f'ignoring possible cross ontology connection between {triple[1].namespace} and {triple[2].namespace}') + # return True, None + # + # found = False + # if triple[2].name in rang: + # found = True + # return found, triple[2].name def _check_range_if_literal(self, triple): found = True if triple[2].datatype is None: - self.log(f'WARNING: {triple[1].name} has a range with unspecified datatype!') - warnings.warn(f'{triple[1].name} has a range with unspecified datatype!') + self.log( + f"WARNING: {triple[1].name} has a range with unspecified datatype!" + ) + warnings.warn(f"{triple[1].name} has a range with unspecified datatype!") return True, None - destination_range = triple[2].datatype.toPython().split('#')[-1] - - if destination_range == 'string': - destination_range = 'str' - elif destination_range == 'integer': - destination_range = 'int' + destination_range = triple[2].datatype.toPython().split("#")[-1] + + if destination_range == "string": + destination_range = "str" + elif destination_range == "integer": + destination_range = "int" rang = triple[1].range if len(rang) > 0: @@ -274,24 +290,23 @@ def _check_range_if_literal(self, triple): def _check_range(self, triple): if self._is_ontoterm(triple[1]): - #check if type was provided + # check if type was provided found = True dm = None - if type(triple[2]).__name__ == 'URIRef': + if type(triple[2]).__name__ == "URIRef": found, dm = self._check_range_if_uriref(triple) - #elif self._is_ontoterm(triple[2]): + # elif self._is_ontoterm(triple[2]): # found, dm = self._check_range_if_ontoterm(triple) - elif type(triple[2]).__name__ == 'Literal': + elif type(triple[2]).__name__ == "Literal": found, dm = self._check_range_if_literal(triple) if not found: - raise ValueError(f'{dm} not in range of {triple[1].name}') - - self.log(f'checked {triple[1].name} against range {dm}') + raise ValueError(f"{dm} not in range of {triple[1].name}") + self.log(f"checked {triple[1].name} against range {dm}") def add(self, triple, validate=True): """ @@ -300,25 +315,24 @@ def add(self, triple, validate=True): """ modified_triple = self._modify_triple(triple) - self.log(f'attempting to add triple: {self._n_triples}') - self.log(f'- {modified_triple[0].toPython()}') - self.log(f'- {modified_triple[1].toPython()}') - self.log(f'- {modified_triple[2].toPython()}') + self.log(f"attempting to add triple: {self._n_triples}") + self.log(f"- {modified_triple[0].toPython()}") + self.log(f"- {modified_triple[1].toPython()}") + self.log(f"- {modified_triple[2].toPython()}") if validate: self._check_domain(triple) self._check_range(triple) - if str(modified_triple[2].toPython()) == 'None': - self.log(f'rejecting None valued triple') + if str(modified_triple[2].toPython()) == "None": + self.log(f"rejecting None valued triple") return self.graph.add(modified_triple) self._n_triples += 1 - self.log('added') + self.log("added") - def triples(self, triple): modified_triple = self._modify_triple(triple) return self.graph.triples(modified_triple) @@ -331,7 +345,6 @@ def remove(self, triple): modified_triple = self._modify_triple(triple) return self.graph.remove(modified_triple) - def create_node(self, namestring, classtype): item = URIRef(namestring) self.add((item, RDF.type, classtype)) @@ -347,26 +360,29 @@ def _initialize_graph(self): if True, alphanumeric names will be used instead of random BNodes name_index: string - Prefix to be added to identifiers, default 01 - + Prefix to be added to identifiers, default 01 + Returns ------- None - """ - #extra triples + """ + # extra triples self.add((CMSO.SimulationCellLength, RDFS.subClassOf, CMSO.Length)) self.add((CMSO.LatticeParameter, RDFS.subClassOf, CMSO.Length)) - self.add((CMSO.Length, CMSO.hasUnit, URIRef("http://qudt.org/vocab/unit/ANGSTROM"))) - + self.add( + (CMSO.Length, CMSO.hasUnit, URIRef("http://qudt.org/vocab/unit/ANGSTROM")) + ) + self.add((CMSO.SimulationCellAngle, RDFS.subClassOf, CMSO.Angle)) self.add((CMSO.LatticeAngle, RDFS.subClassOf, CMSO.Angle)) self.add((CMSO.Angle, CMSO.hasUnit, URIRef("http://qudt.org/vocab/unit/DEG"))) - + self.add((CMSO.LatticeVector, RDFS.subClassOf, CMSO.Vector)) self.add((CMSO.SimulationCellVector, RDFS.subClassOf, CMSO.Vector)) - #self.add((CMSO.PositionVector, RDFS.subClassOf, CMSO.Vector)) - self.add((CMSO.Vector, CMSO.hasUnit, URIRef("http://qudt.org/vocab/unit/ANGSTROM"))) - + # self.add((CMSO.PositionVector, RDFS.subClassOf, CMSO.Vector)) + self.add( + (CMSO.Vector, CMSO.hasUnit, URIRef("http://qudt.org/vocab/unit/ANGSTROM")) + ) def add_calculated_quantity(self, sample, propertyname, value, unit=None): prop = self.create_node(propertyname, CMSO.CalculatedProperty) @@ -374,38 +390,48 @@ def add_calculated_quantity(self, sample, propertyname, value, unit=None): self.add((prop, RDFS.label, Literal(propertyname))) self.add((prop, CMSO.hasValue, Literal(value))) if unit is not None: - self.add((prop, CMSO.hasUnit, URIRef(f'http://qudt.org/vocab/unit/{unit}'))) - + self.add((prop, CMSO.hasUnit, URIRef(f"http://qudt.org/vocab/unit/{unit}"))) def inspect_sample(self, sample): natoms = self.value(sample, CMSO.hasNumberOfAtoms).toPython() - material = list([k[2] for k in self.triples((sample, CMSO.hasMaterial, None))])[0] + material = list([k[2] for k in self.triples((sample, CMSO.hasMaterial, None))])[ + 0 + ] defects = list([k[2] for k in self.triples((material, CMSO.hasDefect, None))]) - composition = list([k[2].toPython() for k in self.triples((material, CMSO.hasElementRatio, None))]) + composition = list( + [ + k[2].toPython() + for k in self.triples((material, CMSO.hasElementRatio, None)) + ] + ) crystalstructure = self.value(material, CMSO.hasStructure) spacegroup = self.value(crystalstructure, CMSO.hasSpaceGroup) spacegroupsymbol = self.value(spacegroup, CMSO.hasSpaceGroupSymbol).toPython() lattice = self.value(sample, CMSO.hasNumberOfAtoms).toPython() defect_types = list([self.value(d, RDF.type).toPython() for d in defects]) - prop_nodes = list([k[2] for k in self.triples((sample, CMSO.hasCalculatedProperty, None))]) + prop_nodes = list( + [k[2] for k in self.triples((sample, CMSO.hasCalculatedProperty, None))] + ) props = list([self.value(prop_node, RDFS.label) for prop_node in prop_nodes]) propvals = list([self.value(d, CMSO.hasValue).toPython() for d in prop_nodes]) units = list([self.value(d, CMSO.hasUnit).toPython() for d in prop_nodes]) st = [] - st.append(f'Sample with {natoms} atoms.\n') + st.append(f"Sample with {natoms} atoms.\n") st.append("Material:\n") st.append(" ".join(composition)) st.append("\n") - st.append(f'Space Group symbol: {spacegroupsymbol}\n') + st.append(f"Space Group symbol: {spacegroupsymbol}\n") if len(defect_types) > 0: - st.append('With defects:\n') + st.append("With defects:\n") for d in defect_types: - st.append(f'{d}\n') + st.append(f"{d}\n") if len(props) > 0: - st.append('With calculated properties:\n') + st.append("With calculated properties:\n") for x in range(len(props)): - st.append(f'{props[x]} with value: {propvals[x]} and unit: {units[x]}\n') + st.append( + f"{props[x]} with value: {propvals[x]} and unit: {units[x]}\n" + ) return " ".join(st) @@ -416,7 +442,7 @@ def visualize(self, *args, **kwargs): Parameters ---------- backend: string, {'ipycytoscape', 'graphviz'} - Chooses the backend with which the graph will be plotted. ipycytoscape provides an interactive, + Chooses the backend with which the graph will be plotted. ipycytoscape provides an interactive, but slow visualisation, whereas graphviz provides a non-interactive fast visualisation. edge_color: string @@ -438,28 +464,30 @@ def visualize(self, *args, **kwargs): ----- styledict has the following options. Refer to graphviz and ipycytoscape documentation for more details - BNode: - color: - shape: - style: - URIRef: - color: - shape: - style: - Literal: - color: - shape: - style: + BNode: + color: + shape: + style: + URIRef: + color: + shape: + style: + Literal: + color: + shape: + style: """ self.visualise(*args, **kwargs) - - def visualise(self, - styledict=None, - rankdir='BT', - hide_types=False, - workflow_view=False, - size=None, - layout='neato'): + + def visualise( + self, + styledict=None, + rankdir="BT", + hide_types=False, + workflow_view=False, + size=None, + layout="neato", + ): """ Vosualise the RDF tree of the Graph @@ -484,18 +512,18 @@ def visualise(self, ----- styledict has the following options. Refer to graphviz and ipycytoscape documentation for more details - BNode: - color: - shape: - style: - URIRef: - color: - shape: - style: - Literal: - color: - shape: - style: + BNode: + color: + shape: + style: + URIRef: + color: + shape: + style: + Literal: + color: + shape: + style: """ if size is not None: size = f"{size[0]},{size[1]}" @@ -503,16 +531,17 @@ def visualise(self, sdict = defstyledict.copy() if styledict is not None: sdict = _replace_keys(sdict, styledict) - - return visualize_graph(self.graph, - styledict=sdict, - rankdir=rankdir, - hide_types=hide_types, - workflow_view=workflow_view, - size=size, - layout=layout) - - + + return visualize_graph( + self.graph, + styledict=sdict, + rankdir=rankdir, + hide_types=hide_types, + workflow_view=workflow_view, + size=size, + layout=layout, + ) + def write(self, filename, format="json-ld"): """ Write the serialised version of the graph to a file @@ -523,7 +552,7 @@ def write(self, filename, format="json-ld"): name of output file format: string, {'turtle', 'xml', 'json-ld', 'ntriples', 'n3'} - output format to be written to + output format to be written to Returns ------- @@ -532,63 +561,75 @@ def write(self, filename, format="json-ld"): with open(filename, "w") as fout: fout.write(self.graph.serialize(format=format)) - - def archive(self, package_name, format='turtle', compress=True): + + def archive(self, package_name, format="turtle", compress=True): """ Publish a dataset from graph including per atom quantities """ - #first step make a folder + # first step make a folder if os.path.exists(package_name): - raise ValueError(f'{package_name} already exists') + raise ValueError(f"{package_name} already exists") if compress: - if os.path.exists(f'{package_name}.tar.gz'): - raise ValueError(f'{package_name} tarball already exists') - + if os.path.exists(f"{package_name}.tar.gz"): + raise ValueError(f"{package_name} tarball already exists") + os.mkdir(package_name) - structure_store = f'{package_name}/rdf_structure_store' + structure_store = f"{package_name}/rdf_structure_store" os.mkdir(structure_store) - #now go through each sample, and copy the file, at the same time fix the paths + # now go through each sample, and copy the file, at the same time fix the paths for sample in self.samples: - filepath = self.value(URIRef(f'{sample}_Position'), CMSO.hasPath).toPython() + filepath = self.value(URIRef(f"{sample}_Position"), CMSO.hasPath).toPython() shutil.copy(filepath, structure_store) - - #now we have to remove the old path, and fix new - for val in ['Position', 'Species']: - self.remove((URIRef(f'{sample}_{val}'), CMSO.hasPath, None)) - - #assign corrected path - new_relpath = "/".join(['rdf_structure_store', filepath.split('/')[-1]]) - self.add((URIRef(f'{sample}_{val}'), CMSO.hasPath, Literal(new_relpath, datatype=XSD.string))) - - triple_file = os.path.join(package_name, 'triples') + + # now we have to remove the old path, and fix new + for val in ["Position", "Species"]: + self.remove((URIRef(f"{sample}_{val}"), CMSO.hasPath, None)) + + # assign corrected path + new_relpath = "/".join(["rdf_structure_store", filepath.split("/")[-1]]) + self.add( + ( + URIRef(f"{sample}_{val}"), + CMSO.hasPath, + Literal(new_relpath, datatype=XSD.string), + ) + ) + + triple_file = os.path.join(package_name, "triples") self.write(triple_file, format=format) if compress: - with tarfile.open(f'{package_name}.tar.gz', "w:gz") as tar: + with tarfile.open(f"{package_name}.tar.gz", "w:gz") as tar: tar.add(package_name, arcname=os.path.basename(package_name)) shutil.rmtree(package_name) - @classmethod - def unarchive(cls, package_name, compress=True, - store="Memory", + def unarchive( + cls, + package_name, + compress=True, + store="Memory", store_file=None, identifier="http://default_graph", - ontology=None): + ontology=None, + ): if compress: package_base_name = ".".join(package_name.split(".")[:-2]) - with tarfile.open(package_name) as fin: + with tarfile.open(package_name) as fin: fin.extractall(".") - #os.remove(package_name) - #copy things out - - return cls(store=store, store_file=store_file, - identifier=identifier, - graph_file=f'{package_base_name}/triples', - structure_store=f'{package_base_name}/rdf_structure_store', - ontology=ontology) - + # os.remove(package_name) + # copy things out + + return cls( + store=store, + store_file=store_file, + identifier=identifier, + graph_file=f"{package_base_name}/triples", + structure_store=f"{package_base_name}/rdf_structure_store", + ontology=ontology, + ) + def query(self, inquery): """ Query the graph using SPARQL @@ -605,42 +646,55 @@ def query(self, inquery): """ res = self.graph.query(inquery) if res is not None: - for line in inquery.split('\n'): - if 'SELECT DISTINCT' in line: + for line in inquery.split("\n"): + if "SELECT DISTINCT" in line: break labels = [x[1:] for x in line.split()[2:]] return pd.DataFrame(res, columns=labels) raise ValueError("SPARQL query returned None") - def auto_query(self, source, destination, - condition=None, - return_query=False, - enforce_types=None): + def auto_query( + self, + source, + destination, + condition=None, + return_query=False, + enforce_types=None, + ): if enforce_types is None: for val in [True, False]: - query = self.ontology.create_query(source, destination, - condition=condition, enforce_types=val) + query = self.ontology.create_query( + source, destination, condition=condition, enforce_types=val + ) if return_query: return query res = self.query(query) if len(res) != 0: return res else: - query = self.ontology.create_query(source, destination, - condition=condition, enforce_types=enforce_types) + query = self.ontology.create_query( + source, destination, condition=condition, enforce_types=enforce_types + ) if return_query: return query res = self.query(query) - return res + return res ################################# # Methods to interact with sample ################################# - def query_sample(self, destination, condition=None, return_query=False, enforce_types=None): - return self.auto_query(self.ontology.terms.cmso.AtomicScaleSample, destination, - condition=condition, return_query=return_query, enforce_types=enforce_types) + def query_sample( + self, destination, condition=None, return_query=False, enforce_types=None + ): + return self.auto_query( + self.ontology.terms.cmso.AtomicScaleSample, + destination, + condition=condition, + return_query=return_query, + enforce_types=enforce_types, + ) @property def n_samples(self): @@ -649,7 +703,7 @@ def n_samples(self): """ return len([x for x in self.triples((None, RDF.type, CMSO.AtomicScaleSample))]) - + @property def samples(self): """ @@ -657,7 +711,7 @@ def samples(self): """ return [x[0] for x in self.triples((None, RDF.type, CMSO.AtomicScaleSample))] - + def iterate_graph(self, item, create_new_graph=False): if create_new_graph: self.sgraph = KnowledgeGraph() @@ -665,7 +719,7 @@ def iterate_graph(self, item, create_new_graph=False): for triple in triples: self.sgraph.graph.add(triple) self.iterate_graph(triple[2]) - + def get_sample(self, sample, no_atoms=False): """ Get the Sample as an RDFGraph @@ -690,7 +744,7 @@ def get_sample(self, sample, no_atoms=False): na = self.sgraph.value(sample, CMSO.hasNumberOfAtoms).toPython() return self.sgraph, na return self.sgraph - + def get_system_from_sample(self, sample): """ Get a pyscal :py:class:`pyscal.core.System` from the selected sample @@ -713,24 +767,28 @@ def get_system_from_sample(self, sample): cell_vectors[0].append(self.value(s[2], CMSO.hasComponent_x).toPython()) cell_vectors[1].append(self.value(s[2], CMSO.hasComponent_y).toPython()) cell_vectors[2].append(self.value(s[2], CMSO.hasComponent_z).toPython()) - - #cell_vectors - filepath = self.value(URIRef(f'{sample}_Position'), CMSO.hasPath).toPython() - position_identifier = self.value(URIRef(f'{sample}_Position'), CMSO.hasIdentifier).toPython() - species_identifier = self.value(URIRef(f'{sample}_Species'), CMSO.hasIdentifier).toPython() - - #open the file for reading - with open(filepath, 'r') as fin: + + # cell_vectors + filepath = self.value(URIRef(f"{sample}_Position"), CMSO.hasPath).toPython() + position_identifier = self.value( + URIRef(f"{sample}_Position"), CMSO.hasIdentifier + ).toPython() + species_identifier = self.value( + URIRef(f"{sample}_Species"), CMSO.hasIdentifier + ).toPython() + + # open the file for reading + with open(filepath, "r") as fin: data = json.load(fin) - positions = data[position_identifier]['value'] - species = data[species_identifier]['value'] + positions = data[position_identifier]["value"] + species = data[species_identifier]["value"] atoms = {"positions": positions, "species": species} at = Atoms() at.from_dict(atoms) sys = System() sys.box = cell_vectors - sys.atoms = at + sys.atoms = at return sys def to_file(self, sample, filename=None, format="poscar"): @@ -754,12 +812,12 @@ def to_file(self, sample, filename=None, format="poscar"): if filename is None: filename = os.path.join(os.getcwd(), "out") - + sys = self.get_system_from_sample(sample) - - if format=="ase": + + if format == "ase": return sys.write.ase() - elif format=='poscar': + elif format == "poscar": asesys = sys.write.ase() write(filename, asesys, format="vasp") else: diff --git a/atomrdf/json_io.py b/atomrdf/json_io.py index ed3bdae..7cb5b0b 100644 --- a/atomrdf/json_io.py +++ b/atomrdf/json_io.py @@ -2,23 +2,24 @@ import yaml from atomrdf.encoder import NumpyArrayEncoder + def write_file(outfile, data): """ Write a given dict as json file - + Parameters ---------- outfile: string name of output file. `.json` will be added to the given file name - + data: dict input data dict - + Returns ------- None """ with open(".".join([outfile, "json"]), "w") as fout: json.dump(data, fout, cls=NumpyArrayEncoder) - #with open(".".join([outfile, "yaml"]), "w") as fout: - # yaml.unsafe_dump(convert_to_dict(sys), fout) \ No newline at end of file + # with open(".".join([outfile, "yaml"]), "w") as fout: + # yaml.unsafe_dump(convert_to_dict(sys), fout) diff --git a/atomrdf/namespace.py b/atomrdf/namespace.py index 1af9c91..dc32c32 100644 --- a/atomrdf/namespace.py +++ b/atomrdf/namespace.py @@ -5,31 +5,32 @@ from atomrdf.network.network import OntologyNetwork + class Namespace(AttrSetter, RDFLibNamespace): - def __init__(self, infile, delimiter='/'): + def __init__(self, infile, delimiter="/"): AttrSetter.__init__(self) self.network = OntologyNetwork(infile, delimiter=delimiter) - #print(type(self.network.onto.tree.base_iri)) - #self.namespace = RDFLibNamespace(self.network.onto.tree.base_iri) + # print(type(self.network.onto.tree.base_iri)) + # self.namespace = RDFLibNamespace(self.network.onto.tree.base_iri) RDFLibNamespace.__init__(self.network.onto.tree.base_iri) - #self.namespace = RDFLibNamespace("http://purls.helmholtz-metadaten.de/cmso/") + # self.namespace = RDFLibNamespace("http://purls.helmholtz-metadaten.de/cmso/") self.name = self.network.onto.tree.name mapdict = {} - - #now iterate over all attributes - for k1 in ['class', 'object_property', 'data_property']: + + # now iterate over all attributes + for k1 in ["class", "object_property", "data_property"]: for k2, val in self.network.onto.attributes[k1].items(): if val.namespace == self.name: mapdict[val.name_without_prefix] = val - - #add attributes + + # add attributes self._add_attribute(mapdict) file_location = os.path.dirname(__file__) -CMSO = Namespace(os.path.join(file_location, 'data/cmso.owl')) -PLDO = Namespace(os.path.join(file_location, 'data/pldo.owl')) -PODO = Namespace(os.path.join(file_location, 'data/podo.owl')) -ASMO = Namespace(os.path.join(file_location, 'data/asmo.owl')) -PROV = RDFLibNamespace("http://www.w3.org/ns/prov#") \ No newline at end of file +CMSO = Namespace(os.path.join(file_location, "data/cmso.owl")) +PLDO = Namespace(os.path.join(file_location, "data/pldo.owl")) +PODO = Namespace(os.path.join(file_location, "data/podo.owl")) +ASMO = Namespace(os.path.join(file_location, "data/asmo.owl")) +PROV = RDFLibNamespace("http://www.w3.org/ns/prov#") diff --git a/atomrdf/network/network.py b/atomrdf/network/network.py index ad164a6..0aae253 100644 --- a/atomrdf/network/network.py +++ b/atomrdf/network/network.py @@ -11,54 +11,56 @@ owlfile = os.path.join(os.path.dirname(__file__), "../data/cmso.owl") + def _replace_name(name): - return ".".join(name.split(':')) + return ".".join(name.split(":")) + class OntologyNetwork: """ Network representation of Onto """ - def __init__(self, infile=None, delimiter='/'): + + def __init__(self, infile=None, delimiter="/"): if infile is None: infile = owlfile - + self.g = nx.DiGraph() self.onto = OntoParser(infile, delimiter=delimiter) - self.onto.attributes['data_node'] = [] - self.data_prefix = 'value' + self.onto.attributes["data_node"] = [] + self.data_prefix = "value" self.terms = AttrSetter() - self._parse_all() - + self._parse_all() + def _assign_attributes(self): mapdict = {} - #add first level - namespaces + # add first level - namespaces for key in self.namespaces.keys(): mapdict[key] = {} - - #now iterate over all attributes - for k1 in ['class', 'object_property', 'data_property']: + + # now iterate over all attributes + for k1 in ["class", "object_property", "data_property"]: for k2, val in self.onto.attributes[k1].items(): mapdict[val.namespace][val.name_without_prefix] = val - - self.terms._add_attribute(mapdict) + self.terms._add_attribute(mapdict) def _parse_all(self): - #call methods + # call methods self._add_class_nodes() self._add_object_properties() self._add_data_properties() self._assign_attributes() def __add__(self, ontonetwork): - #add onto network + # add onto network self.onto = self.onto + ontonetwork.onto - #now parse again + # now parse again self._parse_all() return self def strip_name(self, name): - raw = name.split(':') + raw = name.split(":") if len(raw) > 1: return raw[-1] return name @@ -82,37 +84,35 @@ def get_shortest_path(self, source, target, triples=False): path = nx.shortest_path(self.g, source=source, target=target) if triples: triple_list = [] - for x in range(len(path)//2): - triple_list.append(path[2*x:2*x+3]) + for x in range(len(path) // 2): + triple_list.append(path[2 * x : 2 * x + 3]) return triple_list return path - + def _add_class_nodes(self): - for key, val in self.onto.attributes['class'].items(): - self.g.add_node(val.name, node_type='class') - + for key, val in self.onto.attributes["class"].items(): + self.g.add_node(val.name, node_type="class") + def _add_object_properties(self): - for key, val in self.onto.attributes['object_property'].items(): - self.g.add_node(val.name, node_type='object_property') - #find domain + for key, val in self.onto.attributes["object_property"].items(): + self.g.add_node(val.name, node_type="object_property") + # find domain for d in val.domain: self.g.add_edge(d, val.name) for r in val.range: self.g.add_edge(val.name, r) - def _add_data_properties(self): - for key, val in self.onto.attributes['data_property'].items(): - self.g.add_node(val.name, node_type='data_property') + for key, val in self.onto.attributes["data_property"].items(): + self.g.add_node(val.name, node_type="data_property") for d in val.domain: self.g.add_edge(d, val.name) for r in val.range: - data_node = f'{val.name}{self.data_prefix}' - self.onto.attributes['data_node'].append(data_node) - self.g.add_node(data_node, node_type='literal', data_type=r) + data_node = f"{val.name}{self.data_prefix}" + self.onto.attributes["data_node"].append(data_node) + self.g.add_node(data_node, node_type="literal", data_type=r) self.g.add_edge(val.name, data_node) - def add_namespace(self, namespace_name, namespace_iri): """ Add a new namespace @@ -122,18 +122,32 @@ def add_namespace(self, namespace_name, namespace_iri): else: raise KeyError("namespace is already there!") - def add_term(self, uri, node_type, namespace=None, - dm=[], rn=[], data_type=None, - node_id=None, delimiter='/'): + def add_term( + self, + uri, + node_type, + namespace=None, + dm=[], + rn=[], + data_type=None, + node_id=None, + delimiter="/", + ): """ Add a node """ - #namespace = strip_name(uri, delimiter, get_what="namespace") - #name = strip_name(uri, delimiter, get_what="name") - term = OntoTerm(uri, namespace=namespace, - node_type=node_type, dm =dm, - rn=rn, data_type=data_type, node_id=node_id, - delimiter=delimiter) + # namespace = strip_name(uri, delimiter, get_what="namespace") + # name = strip_name(uri, delimiter, get_what="name") + term = OntoTerm( + uri, + namespace=namespace, + node_type=node_type, + dm=dm, + rn=rn, + data_type=data_type, + node_id=node_id, + delimiter=delimiter, + ) if not term.namespace in self.onto.namespaces.keys(): raise ValueError("Namespace not found, first add namespace") self.onto.attributes[node_type][term.name] = term @@ -151,121 +165,135 @@ def add_path(self, triple): pred = triple[1] obj = triple[2] - if sub not in self.onto.attributes['class'].keys(): - raise ValueError(f'{sub} not found in self.attributes') + if sub not in self.onto.attributes["class"].keys(): + raise ValueError(f"{sub} not found in self.attributes") - #now add + # now add subclasses = self.onto._get_subclasses(sub) for subclass in subclasses: - self.g.add_edge(subclass, pred) - - #now add pred - if pred in self.onto.attributes['object_property'].keys(): - if obj not in self.onto.attributes['class'].keys(): - raise ValueError(f'{obj} not found in self.attributes') + self.g.add_edge(subclass, pred) + + # now add pred + if pred in self.onto.attributes["object_property"].keys(): + if obj not in self.onto.attributes["class"].keys(): + raise ValueError(f"{obj} not found in self.attributes") subclasses = self.onto._get_subclasses(obj) for subclass in subclasses: - self.g.add_edge(pred, subclass) + self.g.add_edge(pred, subclass) - #another possibility it is data property - elif pred in self.onto.attributes['data_property'].keys(): - data_node = f'{pred}{self.data_prefix}' - self.g.add_node(data_node, node_type='literal', data_type=obj) + # another possibility it is data property + elif pred in self.onto.attributes["data_property"].keys(): + data_node = f"{pred}{self.data_prefix}" + self.g.add_node(data_node, node_type="literal", data_type=obj) self.g.add_edge(pred, data_node) - + else: - raise ValueError(f'{pred} not found in self.attributes') + raise ValueError(f"{pred} not found in self.attributes") - def draw(self, styledict = {"class": {"shape":"box"}, - "object_property": {"shape":"ellipse"}, - "data_property": {"shape":"ellipse"}, - "literal": {"shape":"parallelogram"},}): + def draw( + self, + styledict={ + "class": {"shape": "box"}, + "object_property": {"shape": "ellipse"}, + "data_property": {"shape": "ellipse"}, + "literal": {"shape": "parallelogram"}, + }, + ): dot = graphviz.Digraph() - node_list = list(self.g.nodes(data='node_type')) + node_list = list(self.g.nodes(data="node_type")) edge_list = list(self.g.edges) for node in node_list: name = _replace_name(node[0]) if node[1] is not None: t = node[1] - dot.node(name, shape=styledict[t]['shape'], fontsize="6") + dot.node(name, shape=styledict[t]["shape"], fontsize="6") for edge in edge_list: dot.edge(_replace_name(edge[0]), _replace_name(edge[1])) return dot def get_path_from_sample(self, target): - path = self.get_shortest_path(source="cmso:ComputationalSample", target=target, triples=True) + path = self.get_shortest_path( + source="cmso:ComputationalSample", target=target, triples=True + ) return path - - + def create_query(self, source, destinations, condition=None, enforce_types=True): """ values is a dict with keys value, operation """ if not isinstance(destinations, list): destinations = [destinations] - + source_name = source.query_name destination_names = [destination.query_name for destination in destinations] - - #if condition is specified, and is not there, add it + + # if condition is specified, and is not there, add it if condition is not None: if condition.query_name not in destination_names: destination_names.append(condition.query_name) - - #add source if not available + + # add source if not available if source_name not in destination_names: destination_names.append(source_name) - #start prefix of query + # start prefix of query query = [] for key, val in self.namespaces.items(): - query.append(f'PREFIX {key}: <{val}>') + query.append(f"PREFIX {key}: <{val}>") for key, val in self.extra_namespaces.items(): - query.append(f'PREFIX {key}: <{val}>') - - #now for each destination, start adding the paths in the query + query.append(f"PREFIX {key}: <{val}>") + + # now for each destination, start adding the paths in the query all_triplets = {} for destination in destination_names: triplets = self.get_shortest_path(source_name, destination, triples=True) all_triplets[destination] = triplets - - select_destinations = [f'?{self.strip_name(destination)}' for destination in destination_names] + + select_destinations = [ + f"?{self.strip_name(destination)}" for destination in destination_names + ] query.append(f'SELECT DISTINCT {" ".join(select_destinations)}') query.append("WHERE {") - - #now add corresponding triples + + # now add corresponding triples for destination in destination_names: for triple in all_triplets[destination]: - #print(triple) - query.append(" ?%s %s ?%s ."%(self.strip_name(triple[0]), - triple[1], - self.strip_name(triple[2]))) - - #we enforce types of the source and destination + # print(triple) + query.append( + " ?%s %s ?%s ." + % ( + self.strip_name(triple[0]), + triple[1], + self.strip_name(triple[2]), + ) + ) + + # we enforce types of the source and destination if enforce_types: - if source.node_type == 'class': - query.append(" ?%s rdf:type %s ."%(self.strip_name(source.query_name), source.query_name)) + if source.node_type == "class": + query.append( + " ?%s rdf:type %s ." + % (self.strip_name(source.query_name), source.query_name) + ) for destination in destinations: - if destination.node_type == 'class': - query.append(" ?%s rdf:type %s ."%(self.strip_name(destination.query_name), destination.query_name)) - #now we have to add filters - #filters are only needed if it is a dataproperty - filter_text = '' - - #make filters; get all the unique filters from all the classes in destinations + if destination.node_type == "class": + query.append( + " ?%s rdf:type %s ." + % ( + self.strip_name(destination.query_name), + destination.query_name, + ) + ) + # now we have to add filters + # filters are only needed if it is a dataproperty + filter_text = "" + + # make filters; get all the unique filters from all the classes in destinations if condition is not None: if condition._condition is not None: filter_text = condition._condition - if filter_text != '': - query.append(f'FILTER {filter_text}') - query.append('}') - return '\n'.join(query) - - - - - - - - \ No newline at end of file + if filter_text != "": + query.append(f"FILTER {filter_text}") + query.append("}") + return "\n".join(query) diff --git a/atomrdf/network/ontology.py b/atomrdf/network/ontology.py index be9516b..2944c4c 100644 --- a/atomrdf/network/ontology.py +++ b/atomrdf/network/ontology.py @@ -1,51 +1,80 @@ import os from atomrdf.network.network import OntologyNetwork + def read_ontology(): - #read in ontologies - file_location = os.path.dirname(__file__).split('/') - file_location = "/".join(file_location[:-1]) - - cmso = OntologyNetwork(os.path.join(file_location, 'data/cmso.owl')) - pldo = OntologyNetwork(os.path.join(file_location, 'data/pldo.owl')) - podo = OntologyNetwork(os.path.join(file_location, 'data/podo.owl')) - asmo = OntologyNetwork(os.path.join(file_location, 'data/asmo.owl')) - - #combine them - combo = cmso + pldo + podo + asmo - - #add namespaces - combo.add_namespace('prov', 'http://www.w3.org/ns/prov#') - combo.add_namespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#') - combo.add_namespace('rdfs', 'http://www.w3.org/2000/01/rdf-schema#') - - #add extra terms for quering - combo.add_term('http://www.w3.org/ns/prov#Entity', 'class', delimiter='#') - combo.add_term('http://www.w3.org/ns/prov#Activity', 'class', delimiter='#') - combo.add_term('http://www.w3.org/ns/prov#SoftwareAgent', 'class', delimiter='#') - combo.add_term('http://www.w3.org/ns/prov#wasDerivedFrom', 'object_property', delimiter='#') - combo.add_term('http://www.w3.org/ns/prov#wasGeneratedBy', 'object_property', delimiter='#') - combo.add_term('http://www.w3.org/ns/prov#wasAssociatedWith', 'object_property', delimiter='#') - combo.add_term('http://www.w3.org/ns/prov#actedOnBehalfOf', 'object_property', delimiter='#') - combo.add_term('http://www.w3.org/2000/01/rdf-schema#label', 'data_property', delimiter='#', namespace='rdfs') - combo.add_term('http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'object_property', delimiter='#', namespace='rdf') - - #add paths - - #General fixes - combo.add_path(('cmso:CrystalStructure', 'cmso:hasAltName', 'string')) - combo.add_path(('cmso:ChemicalElement', 'cmso:hasSymbol', 'string')) - - #interontology paths - combo.add_path(('cmso:Material', 'cmso:hasDefect', 'pldo:PlanarDefect')) - combo.add_path(('cmso:Material', 'cmso:hasDefect', 'podo:Vacancy')) - combo.add_path(('cmso:SimulationCell', 'podo:hasVacancyConcentration', 'float')) - combo.add_path(('cmso:SimulationCell', 'podo:hasNumberOfVacancies', 'int')) - combo.add_path(('cmso:ComputationalSample', 'prov:wasDerivedFrom', 'cmso:ComputationalSample')) - combo.add_path(('cmso:ComputationalSample', 'rdf:type', 'prov:Entity')) - combo.add_path(('asmo:StructureOptimization', 'rdf:type', 'prov:Activity')) - combo.add_path(('asmo:StructureOptimization', 'prov:wasAssociatedWith', 'prov:SoftwareAgent')) - combo.add_path(('cmso:ComputationalSample', 'prov:wasGeneratedBy', 'asmo:StructureOptimization')) - - #return - return combo \ No newline at end of file + # read in ontologies + file_location = os.path.dirname(__file__).split("/") + file_location = "/".join(file_location[:-1]) + + cmso = OntologyNetwork(os.path.join(file_location, "data/cmso.owl")) + pldo = OntologyNetwork(os.path.join(file_location, "data/pldo.owl")) + podo = OntologyNetwork(os.path.join(file_location, "data/podo.owl")) + asmo = OntologyNetwork(os.path.join(file_location, "data/asmo.owl")) + + # combine them + combo = cmso + pldo + podo + asmo + + # add namespaces + combo.add_namespace("prov", "http://www.w3.org/ns/prov#") + combo.add_namespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") + combo.add_namespace("rdfs", "http://www.w3.org/2000/01/rdf-schema#") + + # add extra terms for quering + combo.add_term("http://www.w3.org/ns/prov#Entity", "class", delimiter="#") + combo.add_term("http://www.w3.org/ns/prov#Activity", "class", delimiter="#") + combo.add_term("http://www.w3.org/ns/prov#SoftwareAgent", "class", delimiter="#") + combo.add_term( + "http://www.w3.org/ns/prov#wasDerivedFrom", "object_property", delimiter="#" + ) + combo.add_term( + "http://www.w3.org/ns/prov#wasGeneratedBy", "object_property", delimiter="#" + ) + combo.add_term( + "http://www.w3.org/ns/prov#wasAssociatedWith", "object_property", delimiter="#" + ) + combo.add_term( + "http://www.w3.org/ns/prov#actedOnBehalfOf", "object_property", delimiter="#" + ) + combo.add_term( + "http://www.w3.org/2000/01/rdf-schema#label", + "data_property", + delimiter="#", + namespace="rdfs", + ) + combo.add_term( + "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", + "object_property", + delimiter="#", + namespace="rdf", + ) + + # add paths + + # General fixes + combo.add_path(("cmso:CrystalStructure", "cmso:hasAltName", "string")) + combo.add_path(("cmso:ChemicalElement", "cmso:hasSymbol", "string")) + + # interontology paths + combo.add_path(("cmso:Material", "cmso:hasDefect", "pldo:PlanarDefect")) + combo.add_path(("cmso:Material", "cmso:hasDefect", "podo:Vacancy")) + combo.add_path(("cmso:SimulationCell", "podo:hasVacancyConcentration", "float")) + combo.add_path(("cmso:SimulationCell", "podo:hasNumberOfVacancies", "int")) + combo.add_path( + ("cmso:ComputationalSample", "prov:wasDerivedFrom", "cmso:ComputationalSample") + ) + combo.add_path(("cmso:ComputationalSample", "rdf:type", "prov:Entity")) + combo.add_path(("asmo:StructureOptimization", "rdf:type", "prov:Activity")) + combo.add_path( + ("asmo:StructureOptimization", "prov:wasAssociatedWith", "prov:SoftwareAgent") + ) + combo.add_path( + ( + "cmso:ComputationalSample", + "prov:wasGeneratedBy", + "asmo:StructureOptimization", + ) + ) + + # return + return combo diff --git a/atomrdf/network/parser.py b/atomrdf/network/parser.py index 543e317..5e815db 100644 --- a/atomrdf/network/parser.py +++ b/atomrdf/network/parser.py @@ -1,4 +1,3 @@ - from owlready2 import get_ontology import owlready2 @@ -7,28 +6,28 @@ import numpy as np import itertools -from atomrdf.network.term import OntoTerm, strip_name +from atomrdf.network.term import OntoTerm, strip_name from atomrdf.network.patch import patch_terms class OntoParser: - def __init__(self, infile, delimiter='/'): + def __init__(self, infile, delimiter="/"): if os.path.exists(infile): - self.tree = get_ontology(f'file://{infile}').load() - elif infile[:4] == 'http': + self.tree = get_ontology(f"file://{infile}").load() + elif infile[:4] == "http": self.tree = get_ontology(infile) else: - raise FileNotFoundError(f'file {infile} not found!') + raise FileNotFoundError(f"file {infile} not found!") self.attributes = {} - self.attributes['class'] = {} - self.attributes['object_property'] = {} - self.attributes['data_property'] = {} + self.attributes["class"] = {} + self.attributes["object_property"] = {} + self.attributes["data_property"] = {} self.delimiter = delimiter self.classes = None self.namespaces = {self.tree.name: self.tree.base_iri} self.extra_namespaces = {} self._parse_class() - #print(self.attributes) + # print(self.attributes) self._parse_object_property() self._parse_data_property() self._recheck_namespaces() @@ -39,13 +38,12 @@ def __add__(self, ontoparser): - classes - attributes dict """ - for mainkey in ['class', 'object_property', 'data_property']: + for mainkey in ["class", "object_property", "data_property"]: if mainkey in ontoparser.attributes.keys(): for key, val in ontoparser.attributes[mainkey].items(): - self.attributes[mainkey][key] = val - + self.attributes[mainkey][key] = val - #now change classes + # now change classes if ontoparser.classes is not None: for clx in ontoparser.classes: self.classes.append(clx) @@ -60,28 +58,29 @@ def __add__(self, ontoparser): def __radd__(self, ontoparser): return self.__add__(ontoparser) - - def _strip_datatype(self, uri, delimiter='#'): + + def _strip_datatype(self, uri, delimiter="#"): uri_split = uri.split(delimiter) - return uri_split[-1] - + return uri_split[-1] + def _dict_to_lst(self, d): return [val for key, val in d.items()] - + def _get_subclasses(self, name): arg = self._in_which_bin_is_class(name) if arg is not None: return self.classes[arg] else: return [name] - + def _recheck_namespaces(self): for mainkey in self.attributes.keys(): for key, val in self.attributes[mainkey].items(): namespace = self.attributes[mainkey][key].namespace if namespace not in self.namespaces.keys(): - self.namespaces[namespace] = self.attributes[mainkey][key].namespace_with_prefix - + self.namespaces[namespace] = self.attributes[mainkey][ + key + ].namespace_with_prefix def _parse_data_property(self): for c in self.tree.data_properties(): @@ -91,47 +90,44 @@ def _parse_data_property(self): dm = [strip_name(d.iri, self.delimiter) for d in dm[0].Classes] except: dm = [strip_name(d.iri, self.delimiter) for d in dm] - - #now get subclasses + + # now get subclasses dm = [self._get_subclasses(d) for d in dm] dm = list(itertools.chain(*dm)) - + rn = c.range try: rn = [r.__name__ for r in rn[0].Classes if r is not None] except: rn = [r.__name__ for r in rn if r is not None] - - #Subproperties - #Commented out for now - #subprops = self.tree.search(subproperty_of=getattr(self.tree, c.name)) - #for subprop in subprops: + # Subproperties + # Commented out for now + # subprops = self.tree.search(subproperty_of=getattr(self.tree, c.name)) + # for subprop in subprops: # if subprop.iri != iri: # #print(subprop.iri) # pass - #PATCH - #Here: we patch specific items specifically for pyscal rdf - rn = patch_terms(iri, rn) + # PATCH + # Here: we patch specific items specifically for pyscal rdf + rn = patch_terms(iri, rn) - #print(iri, rn) - #print(iri, dm) + # print(iri, rn) + # print(iri, dm) term = OntoTerm(iri, delimiter=self.delimiter) - dm = [x.replace('07:owl#Thing', 'owl:Thing') for x in dm] + dm = [x.replace("07:owl#Thing", "owl:Thing") for x in dm] term.domain = dm term.range = rn - term.node_type = 'data_property' - self.attributes['data_property'][term.name] = term - #assign this data + term.node_type = "data_property" + self.attributes["data_property"][term.name] = term + # assign this data for d in dm: - if d!='owl:Thing': - self.attributes['class'][d].is_range_of.append(term.name) + if d != "owl:Thing": + self.attributes["class"][d].is_range_of.append(term.name) + # subproperties should be treated the same - #subproperties should be treated the same - - def _parse_object_property(self): for c in self.tree.object_properties(): iri = c.iri @@ -147,8 +143,8 @@ def _parse_object_property(self): else: dmnew.append(strip_name(d.iri, self.delimiter)) dm = dmnew - - #now get subclasses + + # now get subclasses dm = [self._get_subclasses(d) for d in dm] dm = list(itertools.chain(*dm)) @@ -157,32 +153,32 @@ def _parse_object_property(self): rn = [strip_name(r.iri, self.delimiter) for r in rn[0].Classes] except: rn = [strip_name(r.iri, self.delimiter) for r in rn] - - #now get subclasses + + # now get subclasses rn = [self._get_subclasses(d) for d in rn] rn = list(itertools.chain(*rn)) term = OntoTerm(iri, delimiter=self.delimiter) term.domain = dm term.range = rn - term.node_type = 'object_property' - self.attributes['object_property'][term.name] = term + term.node_type = "object_property" + self.attributes["object_property"][term.name] = term for d in dm: - if d!='07:owl#Thing': - if d in self.attributes['class']: - self.attributes['class'][d].is_range_of.append(term.name) + if d != "07:owl#Thing": + if d in self.attributes["class"]: + self.attributes["class"][d].is_range_of.append(term.name) for r in rn: - if r!='07:owl#Thing': - if d in self.attributes['class']: - self.attributes['class'][d].is_domain_of.append(term.name) - + if r != "07:owl#Thing": + if d in self.attributes["class"]: + self.attributes["class"][d].is_domain_of.append(term.name) + def _parse_class_basic(self): classes = [] for c in self.tree.classes(): iri = c.iri - #print(iri) - #print(iri) - #CHILDREN + # print(iri) + # print(iri) + # CHILDREN children = self.tree.get_children_of(c) named_instances = self.tree.get_instances_of(c) equiv_classes = c.equivalent_to @@ -190,13 +186,12 @@ def _parse_class_basic(self): subclasses.append(c) for sb in subclasses: term = OntoTerm(sb.iri, delimiter=self.delimiter) - term.node_type ='class' - self.attributes['class'][term.name] = term + term.node_type = "class" + self.attributes["class"][term.name] = term subclasses = [strip_name(sb.iri, self.delimiter) for sb in subclasses] classes.append(subclasses) - - #try: + # try: # subclasses = self.tree.search(subclass_of=getattr(self.tree, c.name)) # for sb in subclasses: # term = OntoTerm(sb.iri, delimiter=self.delimiter) @@ -204,30 +199,30 @@ def _parse_class_basic(self): # self.attributes['class'][term.name] = term # subclasses = [strip_name(sb.iri, self.delimiter) for sb in subclasses] # classes.append(subclasses) - #except: + # except: # term = OntoTerm(c.iri, delimiter=self.delimiter) # term.node_type ='class' - # self.attributes['class'][term.name] = term + # self.attributes['class'][term.name] = term # classes.append([strip_name(c.iri, self.delimiter)]) return classes - + def _aggregate_keys(self, dd): lst = copy.deepcopy(dd) - #choose the first list + # choose the first list large_list = [] start = lst[0] - #delete it from the main list + # delete it from the main list nruns = len(lst) del lst[0] - #now loop, if there is intersection add to this list + # now loop, if there is intersection add to this list while True: found = False index_to_delete = [] for count, ls in enumerate(lst): common = len(list(set(start) & set(ls))) - #print(common) - if common>0: - #common elements found! merge them + # print(common) + if common > 0: + # common elements found! merge them for l in ls: start.append(l) found = True @@ -237,26 +232,22 @@ def _aggregate_keys(self, dd): del lst[ii] else: large_list.append(np.unique(start)) - if len(lst)==0: + if len(lst) == 0: break else: start = lst[0] del lst[0] return large_list - + def _parse_class(self): sub_classes = self._parse_class_basic() - #now we have to go through and clean up sub classes + # now we have to go through and clean up sub classes sub_classes = self._aggregate_keys(sub_classes) self.classes = sub_classes - + def _in_which_bin_is_class(self, name): for count, lst in enumerate(self.classes): if name in lst: return count else: return None - - - - \ No newline at end of file diff --git a/atomrdf/network/patch.py b/atomrdf/network/patch.py index 4047315..6197e73 100644 --- a/atomrdf/network/patch.py +++ b/atomrdf/network/patch.py @@ -7,22 +7,22 @@ import os + def patch_terms(iri, rn): """ Remove functions as patching is done """ - #Term: hasSymbol - #Ontology: CMSO - #Reason: Range is not specified in the owl file. - #This prevents owlready2 from reading in this property correctly. - if iri == 'http://purls.helmholtz-metadaten.de/cmso/hasSymbol': - rn = ['str'] - #Term: hasValue - #Ontology: CMSO - #Reason: Range is Literal(); however here we use this for number values, hence we can fix this. - #See fn: `add_calculated_property` - elif iri == 'http://purls.helmholtz-metadaten.de/cmso/hasValue': - rn = ['float'] - - return rn + # Term: hasSymbol + # Ontology: CMSO + # Reason: Range is not specified in the owl file. + # This prevents owlready2 from reading in this property correctly. + if iri == "http://purls.helmholtz-metadaten.de/cmso/hasSymbol": + rn = ["str"] + # Term: hasValue + # Ontology: CMSO + # Reason: Range is Literal(); however here we use this for number values, hence we can fix this. + # See fn: `add_calculated_property` + elif iri == "http://purls.helmholtz-metadaten.de/cmso/hasValue": + rn = ["float"] + return rn diff --git a/atomrdf/network/term.py b/atomrdf/network/term.py index 6fc0f7e..a2a61b5 100644 --- a/atomrdf/network/term.py +++ b/atomrdf/network/term.py @@ -1,9 +1,11 @@ """ https://docs.python.org/3/library/operator.html """ + from rdflib import Namespace import numbers + def _get_name(uri, delimiter): """ Just get name with namespace prefix @@ -12,27 +14,29 @@ def _get_name(uri, delimiter): name = uri_split[-1] return name + def _get_namespace(uri, delimiter): if delimiter == "/": uri_split = uri.split(delimiter) - if len(uri_split)>1: + if len(uri_split) > 1: namespace = uri_split[-2] else: - namespace = uri + namespace = uri else: uri_split = uri.split(delimiter) uri_split = uri_split[0].split("/") - if len(uri_split)>0: + if len(uri_split) > 0: namespace = uri_split[-1] else: namespace = uri - return namespace + return namespace -def strip_name(uri, delimiter, get_what='name', namespace=None): + +def strip_name(uri, delimiter, get_what="name", namespace=None): if get_what == "namespace": return _get_namespace(uri, delimiter) - - elif get_what == "name": + + elif get_what == "name": if namespace is None: namespace = _get_namespace(uri, delimiter) name = _get_name(uri, delimiter) @@ -40,13 +44,17 @@ def strip_name(uri, delimiter, get_what='name', namespace=None): class OntoTerm: - def __init__(self, uri, - namespace=None, - node_type=None, - dm=[], rn=[], - data_type=None, - node_id=None, - delimiter='/'): + def __init__( + self, + uri, + namespace=None, + node_type=None, + dm=[], + rn=[], + data_type=None, + node_id=None, + delimiter="/", + ): """ This is class that represents an ontology element. @@ -57,17 +65,17 @@ def __init__(self, uri, namespace: string, optional if provided this will be used as namespace - + """ self.uri = uri - #type: can be object property, data property, or class + # type: can be object property, data property, or class self.node_type = node_type - #now we need domain and range + # now we need domain and range self.domain = dm self.range = rn - #datatype, that is only need for data properties + # datatype, that is only need for data properties self.data_type = data_type - #identifier + # identifier self.node_id = node_id self.subclasses = [] self.subproperties = [] @@ -76,42 +84,41 @@ def __init__(self, uri, self.is_range_of = [] self._condition = None self._namespace = namespace - #name of the class + # name of the class self._name = None - - @property def uri(self): return self._uri - + @uri.setter def uri(self, val): self._uri = val - + @property def name_without_prefix(self): name = _get_name(self.uri, self.delimiter) - name = name.replace('–', '') - name = name.replace('-', '') + name = name.replace("–", "") + name = name.replace("-", "") return name @property def name(self): - return strip_name(self.uri, self.delimiter, - namespace=self._namespace, get_what="name") + return strip_name( + self.uri, self.delimiter, namespace=self._namespace, get_what="name" + ) @property def namespace(self): if self._namespace is not None: return self._namespace else: - return strip_name(self.uri, self.delimiter, get_what="namespace") + return strip_name(self.uri, self.delimiter, get_what="namespace") @property def namespace_with_prefix(self): uri_split = self.uri.split(self.delimiter) - if len(uri_split)>1: + if len(uri_split) > 1: namespace = self.delimiter.join(uri_split[:-1]) + self.delimiter return namespace else: @@ -120,7 +127,7 @@ def namespace_with_prefix(self): @property def namespace_object(self): uri_split = self.uri.split(self.delimiter) - if len(uri_split)>1: + if len(uri_split) > 1: namespace = self.delimiter.join(uri_split[:-1]) + self.delimiter prop = uri_split[-1] return getattr(Namespace(namespace), prop) @@ -152,14 +159,16 @@ def __repr__(self): return str(self.name) def _clean_datatype(self, r): - if r=='str': - return 'string' + if r == "str": + return "string" return r - #convenience methods for overload checking + # convenience methods for overload checking def _ensure_condition_exists(self): if self._condition is None: - raise ValueError("Individual terms should have condition for this operation!") + raise ValueError( + "Individual terms should have condition for this operation!" + ) def _is_term(self, val): if not isinstance(val, OntoTerm): @@ -168,20 +177,22 @@ def _is_term(self, val): def _is_number(self, val): if not isinstance(val, numbers.Number): raise TypeError("can only be performed with a number!") - + def _is_data_node(self): if not self.node_type == "data_property": - raise TypeError("This operation can only be performed with a data property!") + raise TypeError( + "This operation can only be performed with a data property!" + ) def _create_condition_string(self, condition, val): - return f'(?{self.query_name_without_prefix}{condition}\"{val}\"^^xsd:{self._clean_datatype(self.range[0])})' - - #overloading operators + return f'(?{self.query_name_without_prefix}{condition}"{val}"^^xsd:{self._clean_datatype(self.range[0])})' + + # overloading operators def __eq__(self, val): """ - = + = """ - #self._is_number(val) + # self._is_number(val) self._is_data_node() self._condition = self._create_condition_string("=", val) return self @@ -198,19 +209,18 @@ def __le__(self, val): self._condition = self._create_condition_string("<=", val) return self - def __ne__(self, val): - #self._is_number(val) + # self._is_number(val) self._is_data_node() self._condition = self._create_condition_string("!=", val) return self - + def __ge__(self, val): self._is_number(val) self._is_data_node() self._condition = self._create_condition_string(">=", val) return self - + def __gt__(self, val): self._is_number(val) self._is_data_node() @@ -222,9 +232,9 @@ def __and__(self, term): self._is_data_node() term._is_data_node() self._ensure_condition_exists() - term._ensure_condition_exists() + term._ensure_condition_exists() self._condition = "&&".join([self._condition, term._condition]) - self._condition = f'({self._condition})' + self._condition = f"({self._condition})" return self def and_(self, term): @@ -235,10 +245,10 @@ def __or__(self, term): self._is_data_node() term._is_data_node() self._ensure_condition_exists() - term._ensure_condition_exists() + term._ensure_condition_exists() self._condition = "||".join([self._condition, term._condition]) - self._condition = f'({self._condition})' + self._condition = f"({self._condition})" return self def or_(self, term): - self.__or__(term) \ No newline at end of file + self.__or__(term) diff --git a/atomrdf/properties.py b/atomrdf/properties.py index 16c493f..ce2bbd5 100644 --- a/atomrdf/properties.py +++ b/atomrdf/properties.py @@ -2,7 +2,7 @@ import spglib # DATADICT properties -#------------------------------------------ +# ------------------------------------------ bravais_lattice_dict = { "l12": "https://www.wikidata.org/wiki/Q3006714", "b2": "https://www.wikidata.org/wiki/Q851536", @@ -13,59 +13,87 @@ "fcc": "https://www.wikidata.org/wiki/Q3006714", } + # SIMCELL properties -#-------------------------------------------- +# -------------------------------------------- def get_chemical_composition(system): return system.composition + def get_cell_volume(system): return system.volume + def get_number_of_atoms(system): return system.natoms + def get_simulation_cell_length(system): return system.box_dimensions + def get_simulation_cell_vector(system): return system.box + def get_simulation_cell_angle(system): - return [_get_angle(system.box[0], system.box[1]), - _get_angle(system.box[1], system.box[2]), - _get_angle(system.box[2], system.box[0])] + return [ + _get_angle(system.box[0], system.box[1]), + _get_angle(system.box[1], system.box[2]), + _get_angle(system.box[2], system.box[0]), + ] + # LATTICE properties -#-------------------------------------------- +# -------------------------------------------- + def get_lattice_angle(system): if system._structure_dict is None: return [None, None, None] if "box" in system._structure_dict.keys(): - return [_get_angle(system._structure_dict["box"][0], system._structure_dict["box"][1]), - _get_angle(system._structure_dict["box"][1], system._structure_dict["box"][2]), - _get_angle(system._structure_dict["box"][2], system._structure_dict["box"][0])] + return [ + _get_angle( + system._structure_dict["box"][0], system._structure_dict["box"][1] + ), + _get_angle( + system._structure_dict["box"][1], system._structure_dict["box"][2] + ), + _get_angle( + system._structure_dict["box"][2], system._structure_dict["box"][0] + ), + ] else: return [None, None, None] + def get_lattice_parameter(system): if system.atoms._lattice_constant is None: return [None, None, None] else: if system._structure_dict is not None: if "box" in system._structure_dict.keys(): - return [np.linalg.norm(system._structure_dict["box"][0])*system.atoms._lattice_constant, - np.linalg.norm(system._structure_dict["box"][1])*system.atoms._lattice_constant, - np.linalg.norm(system._structure_dict["box"][2])*system.atoms._lattice_constant] - return [system.atoms._lattice_constant, - system.atoms._lattice_constant, - system.atoms._lattice_constant] + return [ + np.linalg.norm(system._structure_dict["box"][0]) + * system.atoms._lattice_constant, + np.linalg.norm(system._structure_dict["box"][1]) + * system.atoms._lattice_constant, + np.linalg.norm(system._structure_dict["box"][2]) + * system.atoms._lattice_constant, + ] + return [ + system.atoms._lattice_constant, + system.atoms._lattice_constant, + system.atoms._lattice_constant, + ] + def get_crystal_structure_name(system): if system._structure_dict is None: return None return system.atoms._lattice + def get_bravais_lattice(system): if system._structure_dict is None: return None @@ -73,6 +101,7 @@ def get_bravais_lattice(system): return bravais_lattice_dict[system.atoms._lattice] return None + def get_basis_positions(system): if system._structure_dict is None: return None @@ -80,7 +109,8 @@ def get_basis_positions(system): return system._structure_dict["positions"] return None -#def get_basis_occupancy(system): + +# def get_basis_occupancy(system): # if system._structure_dict is None: # return None @@ -88,12 +118,13 @@ def get_basis_positions(system): # occ_numbers = system._structure_dict['species'] # tdict = system.atoms._type_dict # vals = [val for key, val in tdict.items()] - + # if vals[0] is not None: # occ_numbers = [tdict[x] for x in occ_numbers] # return occ_numbers # return None + def get_lattice_vector(system): if system._structure_dict is None: return [None, None, None] @@ -101,6 +132,7 @@ def get_lattice_vector(system): return system._structure_dict["box"] return [None, None, None] + def get_spacegroup_symbol(system): if system._structure_dict is None: return None @@ -110,6 +142,7 @@ def get_spacegroup_symbol(system): except: return None + def get_spacegroup_number(system): if system._structure_dict is None: return None @@ -119,47 +152,53 @@ def get_spacegroup_number(system): except: return None + # ATOM attributes -#-------------------------------------------- +# -------------------------------------------- def get_position(system): return system.atoms.positions + def get_species(system): return system.atoms.species - # SUPPORT functions -#-------------------------------------------- +# -------------------------------------------- def _get_angle(vec1, vec2): """ Get angle between two vectors in degrees - + Parameters ---------- vec1: list first vector - + vec2: list second vector - + Returns ------- angle: float angle in degrees - + Notes ----- Angle is rounded to two decimal points - + """ - return np.round(np.arccos(np.dot(vec1, vec2)/(np.linalg.norm(vec1)*np.linalg.norm(vec2)))*180/np.pi, decimals=2) + return np.round( + np.arccos(np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))) + * 180 + / np.pi, + decimals=2, + ) + def _get_symmetry_dict(system): box = get_lattice_vector(system) direct_coordinates = get_basis_positions(system) - atom_types = system._structure_dict['species'] + atom_types = system._structure_dict["species"] - results = spglib.get_symmetry_dataset((box, - direct_coordinates, atom_types)) - return results["international"], results["number"] + results = spglib.get_symmetry_dataset((box, direct_coordinates, atom_types)) + return results["international"], results["number"] diff --git a/atomrdf/stores.py b/atomrdf/stores.py index 9b4e08d..97740f1 100644 --- a/atomrdf/stores.py +++ b/atomrdf/stores.py @@ -3,23 +3,40 @@ from rdflib import Graph, Literal import os -#special methods; for supporting workflow envs + +# special methods; for supporting workflow envs from atomrdf.workflow import inform_graph -def create_store(kg, store, identifier, - store_file=None, - structure_store=None): +def create_store(kg, store, identifier, store_file=None, structure_store=None): kg.store_file = store_file - if store == 'Memory': - store_memory(kg, store, identifier, store_file=store_file, structure_store=structure_store) - elif store == 'SQLAlchemy': - store_alchemy(kg, store, identifier, store_file=store_file, structure_store=structure_store) - elif type(store).__name__ == 'Project': - store_pyiron(kg, store, identifier, store_file=store_file, structure_store=structure_store) + if store == "Memory": + store_memory( + kg, + store, + identifier, + store_file=store_file, + structure_store=structure_store, + ) + elif store == "SQLAlchemy": + store_alchemy( + kg, + store, + identifier, + store_file=store_file, + structure_store=structure_store, + ) + elif type(store).__name__ == "Project": + store_pyiron( + kg, + store, + identifier, + store_file=store_file, + structure_store=structure_store, + ) else: - raise ValueError('Unknown store found!') + raise ValueError("Unknown store found!") def store_memory(kg, store, identifier, store_file=None, structure_store=None): @@ -27,38 +44,43 @@ def store_memory(kg, store, identifier, store_file=None, structure_store=None): kg.graph = graph kg.structure_store = _setup_structure_store(structure_store=structure_store) + def store_alchemy(kg, store, identifier, store_file=None, structure_store=None): _check_if_sqlalchemy_is_available() if store_file is None: raise ValueError("store file is needed if store is not memory") - kg.graph = Graph(store="SQLAlchemy", identifier=identifier) + kg.graph = Graph(store="SQLAlchemy", identifier=identifier) uri = Literal(f"sqlite:///{store_file}") kg.graph.open(uri, create=True) kg.structure_store = _setup_structure_store(structure_store=structure_store) -def store_pyiron(kg, store, identifier, store_file=None, structure_store=None): - structure_store = os.path.join(store.path, 'rdf_structure_store') +def store_pyiron(kg, store, identifier, store_file=None, structure_store=None): + structure_store = os.path.join(store.path, "rdf_structure_store") kg.structure_store = _setup_structure_store(structure_store=structure_store) - store_file = os.path.join(store.path, f'{store.name}.db') - store_alchemy(kg, store, identifier, store_file, structure_store=structure_store) - #finally update project object + store_file = os.path.join(store.path, f"{store.name}.db") + store_alchemy(kg, store, identifier, store_file, structure_store=structure_store) + # finally update project object inform_graph(store, kg) + def _check_if_sqlalchemy_is_available(): try: import sqlalchemy as sa except ImportError: - raise RuntimeError('Please install the sqlalchemy package') + raise RuntimeError("Please install the sqlalchemy package") try: import rdflib_sqlalchemy as rsa except ImportError: - raise RuntimeError('Please install the rdllib-sqlalchemy package. The development version is needed, please do pip install git+https://github.com/RDFLib/rdflib-sqlalchemy.git@develop') + raise RuntimeError( + "Please install the rdllib-sqlalchemy package. The development version is needed, please do pip install git+https://github.com/RDFLib/rdflib-sqlalchemy.git@develop" + ) + def _setup_structure_store(structure_store=None): if structure_store is None: - structure_store = os.path.join(os.getcwd(), 'rdf_structure_store') + structure_store = os.path.join(os.getcwd(), "rdf_structure_store") if not os.path.exists(structure_store): os.mkdir(structure_store) return structure_store diff --git a/atomrdf/structure.py b/atomrdf/structure.py index 9a1f72e..fdf957c 100644 --- a/atomrdf/structure.py +++ b/atomrdf/structure.py @@ -2,6 +2,7 @@ StructureGraph is the central object in atomrdf which combines all the functionality of :py:class:`atomrdf.graph.RDFGraph` along with easy structural creation routines. """ + import numpy as np import copy from functools import partial, update_wrapper @@ -25,32 +26,37 @@ from rdflib import Graph, Literal, Namespace, XSD, RDF, RDFS, BNode, URIRef from atomrdf.namespace import CMSO, PLDO, PODO -#read element data file -file_location = os.path.dirname(__file__).split('/') +# read element data file +file_location = os.path.dirname(__file__).split("/") file_location = "/".join(file_location[:-1]) -file_location = os.path.join(os.path.dirname(__file__), 'data/element.yml') -with open(file_location, 'r') as fin: +file_location = os.path.join(os.path.dirname(__file__), "data/element.yml") +with open(file_location, "r") as fin: element_indetifiers = yaml.safe_load(fin) -def _make_crystal(structure, - lattice_constant = 1.00, - repetitions = None, - ca_ratio = 1.633, - noise = 0, + +def _make_crystal( + structure, + lattice_constant=1.00, + repetitions=None, + ca_ratio=1.633, + noise=0, element=None, primitive=False, graph=None, - names=False): - - atoms, box, sdict = pcs.make_crystal(structure, + names=False, +): + + atoms, box, sdict = pcs.make_crystal( + structure, lattice_constant=lattice_constant, - repetitions=repetitions, + repetitions=repetitions, ca_ratio=ca_ratio, - noise=noise, - element=element, + noise=noise, + element=element, return_structure_dict=True, - primitive=primitive) - + primitive=primitive, + ) + s = System(graph=graph, names=names) s.box = box s.atoms = atoms @@ -60,109 +66,123 @@ def _make_crystal(structure, s.to_graph() return s -def _make_general_lattice(positions, - types, + +def _make_general_lattice( + positions, + types, box, - lattice_constant = 1.00, - repetitions = None, - noise = 0, + lattice_constant=1.00, + repetitions=None, + noise=0, element=None, graph=None, - names=False): + names=False, +): - atoms, box, sdict = pcs.general_lattice(positions, + atoms, box, sdict = pcs.general_lattice( + positions, types, box, lattice_constant=lattice_constant, repetitions=repetitions, noise=noise, element=element, - return_structure_dict=True) + return_structure_dict=True, + ) s = System(graph=graph, names=names) s.box = box s.atoms = atoms - s.atoms._lattice = 'custom' + s.atoms._lattice = "custom" s.atoms._lattice_constant = lattice_constant s._structure_dict = sdict s.to_graph() return s -def _make_dislocation(burgers_vector, + +def _make_dislocation( + burgers_vector, slip_vector, dislocation_line, elastic_constant_dict, - dislocation_type='monopole', - structure=None, + dislocation_type="monopole", + structure=None, element=None, - lattice_constant = 1.00, - repetitions = None, - ca_ratio = 1.633, - noise = 0, + lattice_constant=1.00, + repetitions=None, + ca_ratio=1.633, + noise=0, primitive=False, graph=None, - names=False - ): - + names=False, +): + from atomman.defect.Dislocation import Dislocation import atomman as am import atomman.unitconvert as uc - + if structure is not None: - #create a structure with the info - input_structure = _make_crystal(structure, - lattice_constant = lattice_constant, - repetitions = repetitions, - ca_ratio = ca_ratio, - noise = noise, - element=element, - primitive=primitive) + # create a structure with the info + input_structure = _make_crystal( + structure, + lattice_constant=lattice_constant, + repetitions=repetitions, + ca_ratio=ca_ratio, + noise=noise, + element=element, + primitive=primitive, + ) elif element is not None: if element in element_dict.keys(): - structure = element_dict[element]['structure'] - lattice_constant=element_dict[element]['lattice_constant'] + structure = element_dict[element]["structure"] + lattice_constant = element_dict[element]["lattice_constant"] else: - raise ValueError('Please provide structure') - input_structure = _make_crystal(structure, - lattice_constant = lattice_constant, - repetitions = repetitions, - ca_ratio = ca_ratio, - noise = noise, - element=element, - primitive=primitive) + raise ValueError("Please provide structure") + input_structure = _make_crystal( + structure, + lattice_constant=lattice_constant, + repetitions=repetitions, + ca_ratio=ca_ratio, + noise=noise, + element=element, + primitive=primitive, + ) else: - raise ValueError('Provide either structure or element') + raise ValueError("Provide either structure or element") - #create the elastic constant object - #possible_keys = ["C11", "C12", "C13", "C14", "C15", "C16", + # create the elastic constant object + # possible_keys = ["C11", "C12", "C13", "C14", "C15", "C16", # "C22", "C23", "C24", "C25", "C26", # "C33", "C34", "C35", "C36", # "C44", "C45", "C46", # "C55", "C56", # "C66"] for key, val in elastic_constant_dict.items(): - elastic_constant_dict[key] = uc.set_in_units(val, 'GPa') + elastic_constant_dict[key] = uc.set_in_units(val, "GPa") C = am.ElasticConstants(**elastic_constant_dict) - box = am.Box(avect=input_structure.box[0], - bvect=input_structure.box[1], - cvect=input_structure.box[2]) - atoms = am.Atoms(atype=input_structure.atoms.types, - pos=input_structure.atoms.positions) - system = am.System(atoms=atoms, - box=box, - pbc=[True, True, True], - symbols=element, - scale=False) - - disc = Dislocation(system, - C, - burgers_vector, - dislocation_line, - slip_vector,) - if dislocation_type == 'monopole': + box = am.Box( + avect=input_structure.box[0], + bvect=input_structure.box[1], + cvect=input_structure.box[2], + ) + atoms = am.Atoms( + atype=input_structure.atoms.types, pos=input_structure.atoms.positions + ) + system = am.System( + atoms=atoms, box=box, pbc=[True, True, True], symbols=element, scale=False + ) + + disc = Dislocation( + system, + C, + burgers_vector, + dislocation_line, + slip_vector, + ) + if dislocation_type == "monopole": disl_system = disc.monopole() - elif dislocation_type == 'periodicarray': + elif dislocation_type == "periodicarray": disl_system = disc.periodicarray() box = [disl_system.box.avect, disl_system.box.bvect, disl_system.box.cvect] @@ -171,12 +191,14 @@ def _make_dislocation(burgers_vector, if element is not None: species = [] for t in types: - species.append(element[int(t)-1]) + species.append(element[int(t) - 1]) else: species = [None for x in range(len(types))] - positions = np.column_stack((atom_df['pos[0]'].values, atom_df['pos[1]'].values, atom_df['pos[2]'].values)) - atom_dict = {'positions': positions, 'types':types, 'species': species} + positions = np.column_stack( + (atom_df["pos[0]"].values, atom_df["pos[1]"].values, atom_df["pos[2]"].values) + ) + atom_dict = {"positions": positions, "types": types, "species": species} atom_obj = Atoms() atom_obj.from_dict(atom_dict) output_structure = System() @@ -185,29 +207,34 @@ def _make_dislocation(burgers_vector, output_structure = output_structure.modify.remap_to_box() return output_structure -def _make_grain_boundary(axis, - sigma, gb_plane, - structure = None, - element = None, - lattice_constant = 1, - repetitions = (1,1,1), + +def _make_grain_boundary( + axis, + sigma, + gb_plane, + structure=None, + element=None, + lattice_constant=1, + repetitions=(1, 1, 1), overlap=0.0, graph=None, - names=False): + names=False, +): gb = GrainBoundary() - gb.create_grain_boundary(axis=axis, sigma=sigma, - gb_plane=gb_plane) + gb.create_grain_boundary(axis=axis, sigma=sigma, gb_plane=gb_plane) if structure is not None: - atoms, box, sdict = gb.populate_grain_boundary(structure, - repetitions = repetitions, - lattice_parameter = lattice_constant, - overlap=overlap) + atoms, box, sdict = gb.populate_grain_boundary( + structure, + repetitions=repetitions, + lattice_parameter=lattice_constant, + overlap=overlap, + ) elif element is not None: - atoms, box, sdict = gb.populate_grain_boundary(element, - repetitions=repetitions, - overlap=overlap) + atoms, box, sdict = gb.populate_grain_boundary( + element, repetitions=repetitions, overlap=overlap + ) s = System(graph=graph, names=names) s.box = box s.atoms = atoms @@ -215,25 +242,28 @@ def _make_grain_boundary(axis, s.atoms._lattice_constant = lattice_constant s._structure_dict = sdict s.to_graph() - gb_dict = {"GBPlane": " ".join(np.array(gb_plane).astype(str)), - "RotationAxis": axis, - "MisorientationAngle": gb.theta, - "GBType": gb.find_gb_character(), - "sigma": gb.sigma, - } + gb_dict = { + "GBPlane": " ".join(np.array(gb_plane).astype(str)), + "RotationAxis": axis, + "MisorientationAngle": gb.theta, + "GBType": gb.find_gb_character(), + "sigma": gb.sigma, + } s.add_gb(gb_dict) return s -def _read_structure(filename, - format="lammps-dump", - graph=None, - names=False, - species=None, - lattice=None, - lattice_constant=None, - basis_box=None, - basis_positions=None, - ): + +def _read_structure( + filename, + format="lammps-dump", + graph=None, + names=False, + species=None, + lattice=None, + lattice_constant=None, + basis_box=None, + basis_positions=None, +): """ Read in structure from file or ase object @@ -268,7 +298,7 @@ def _read_structure(filename, basis_positions: nX3 list, optional specify the relative positions of atoms in the unit cell. Not required if lattice is provided - + Returns ------- Structure @@ -276,38 +306,51 @@ def _read_structure(filename, datadict = {} if lattice is not None: if lattice in structure_dict.keys(): - datadict = structure_dict[lattice]['conventional'] - datadict['lattice'] = lattice + datadict = structure_dict[lattice]["conventional"] + datadict["lattice"] = lattice if lattice_constant is not None: - datadict['lattice_constant'] = lattice_constant + datadict["lattice_constant"] = lattice_constant if basis_box is not None: - datadict['box'] = basis_box + datadict["box"] = basis_box if basis_positions is not None: - datadict['positions'] = basis_positions - - s = System(filename, format=format, species=species, - graph=graph, names=names, warn_read_in=False) + datadict["positions"] = basis_positions + + s = System( + filename, + format=format, + species=species, + graph=graph, + names=names, + warn_read_in=False, + ) s.lattice_properties = datadict s.to_graph() - return s + return s + class System(pc.System): create = AttrSetter() - #create.head = pcs + # create.head = pcs mapdict = {} mapdict["lattice"] = {} for key in structure_dict.keys(): - mapdict["lattice"][key] = update_wrapper(partial(_make_crystal, key), - _make_crystal) + mapdict["lattice"][key] = update_wrapper( + partial(_make_crystal, key), _make_crystal + ) mapdict["lattice"]["custom"] = _make_general_lattice mapdict["element"] = {} for key in element_dict.keys(): - mapdict["element"][key] = update_wrapper(partial(_make_crystal, - element_dict[key]['structure'], - lattice_constant=element_dict[key]['lattice_constant'], - element = key), pcs.make_crystal) + mapdict["element"][key] = update_wrapper( + partial( + _make_crystal, + element_dict[key]["structure"], + lattice_constant=element_dict[key]["lattice_constant"], + element=key, + ), + pcs.make_crystal, + ) mapdict["defect"] = {} mapdict["defect"]["grain_boundary"] = _make_grain_boundary @@ -316,98 +359,115 @@ class System(pc.System): read = AttrSetter() mapdict = {} - mapdict['file'] = _read_structure - mapdict['ase'] = update_wrapper(partial(_read_structure, format='ase'), _read_structure) + mapdict["file"] = _read_structure + mapdict["ase"] = update_wrapper( + partial(_read_structure, format="ase"), _read_structure + ) read._add_attribute(mapdict) - def __init__(self, filename = None, - format = "lammps-dump", - compressed = False, - customkeys = None, - species = None, - source=None, - graph=None, - names=False, - warn_read_in=True): - + def __init__( + self, + filename=None, + format="lammps-dump", + compressed=False, + customkeys=None, + species=None, + source=None, + graph=None, + names=False, + warn_read_in=True, + ): + if (filename is not None) and warn_read_in: - warnings.warn('To provide additional information, use the System.read.file method') - - super().__init__(filename = filename, - format = format, - compressed = compressed, - customkeys = customkeys, - species = species) - - #this is the sample which will be stored + warnings.warn( + "To provide additional information, use the System.read.file method" + ) + + super().__init__( + filename=filename, + format=format, + compressed=compressed, + customkeys=customkeys, + species=species, + ) + + # this is the sample which will be stored self.sample = None - #the graph object should also be attached - #for post-processing of structures + # the graph object should also be attached + # for post-processing of structures self.graph = graph self.names = names self._atom_ids = None if source is not None: self.__dict__.update(source.__dict__) - #assign attributes + # assign attributes self.schema = AttrSetter() mapdict = { - "material": { - "element_ratio": partial(prp.get_chemical_composition, self), - "crystal_structure": { - "name": partial(prp.get_crystal_structure_name, self), - "spacegroup_symbol": partial(prp.get_spacegroup_symbol, self), - "spacegroup_number": partial(prp.get_spacegroup_number, self), - "unit_cell": { - "bravais_lattice": partial(prp.get_bravais_lattice, self), - "lattice_parameter": partial(prp.get_lattice_parameter, self), - "angle": partial(prp.get_lattice_angle, self), - }, - }, + "material": { + "element_ratio": partial(prp.get_chemical_composition, self), + "crystal_structure": { + "name": partial(prp.get_crystal_structure_name, self), + "spacegroup_symbol": partial(prp.get_spacegroup_symbol, self), + "spacegroup_number": partial(prp.get_spacegroup_number, self), + "unit_cell": { + "bravais_lattice": partial(prp.get_bravais_lattice, self), + "lattice_parameter": partial(prp.get_lattice_parameter, self), + "angle": partial(prp.get_lattice_angle, self), + }, + }, }, - "simulation_cell": { - "volume": partial(prp.get_cell_volume, self), - "number_of_atoms": partial(prp.get_number_of_atoms, self), - "length": partial(prp.get_simulation_cell_length, self), - "vector": partial(prp.get_simulation_cell_vector, self), - "angle": partial(prp.get_simulation_cell_angle, self), + "simulation_cell": { + "volume": partial(prp.get_cell_volume, self), + "number_of_atoms": partial(prp.get_number_of_atoms, self), + "length": partial(prp.get_simulation_cell_length, self), + "vector": partial(prp.get_simulation_cell_vector, self), + "angle": partial(prp.get_simulation_cell_angle, self), }, - "atom_attribute": { - "position": partial(prp.get_position, self), - "species": partial(prp.get_species, self), + "atom_attribute": { + "position": partial(prp.get_position, self), + "species": partial(prp.get_species, self), }, } self.schema._add_attribute(mapdict) def delete(self, ids=None, indices=None, condition=None, selection=False): - - masks = self.atoms._generate_bool_list(ids=ids, indices=indices, condition=condition, selection=selection) + + masks = self.atoms._generate_bool_list( + ids=ids, indices=indices, condition=condition, selection=selection + ) delete_list = [masks[self.atoms["head"][x]] for x in range(self.atoms.ntotal)] delete_ids = [x for x in range(self.atoms.ntotal) if delete_list[x]] actual_natoms = self.natoms self.atoms._delete_atoms(delete_ids) if self.graph is not None: - #first annotate graph - val = len([x for x in masks if x]) - c = (val/self.natoms) + # first annotate graph + val = len([x for x in masks if x]) + c = val / self.natoms self.add_vacancy(c, number=val) - #now we need to re-add atoms, so at to remove + # now we need to re-add atoms, so at to remove self.graph.remove((self.sample, CMSO.hasNumberOfAtoms, None)) - self.graph.add((self.sample, CMSO.hasNumberOfAtoms, Literal(actual_natoms-val, datatype=XSD.integer))) - #revamp composition - #remove existing chem composution - + self.graph.add( + ( + self.sample, + CMSO.hasNumberOfAtoms, + Literal(actual_natoms - val, datatype=XSD.integer), + ) + ) + # revamp composition + # remove existing chem composution + chemical_species = self.graph.value(self.sample, CMSO.hasSpecies) - #start by cleanly removing elements + # start by cleanly removing elements for s in self.graph.triples((chemical_species, CMSO.hasElement, None)): element = s[2] self.graph.remove((element, None, None)) self.graph.remove((chemical_species, None, None)) self.graph.remove((self.sample, CMSO.hasSpecies, None)) - - #now recalculate and add it again + + # now recalculate and add it again composition = self.schema.material.element_ratio() valid = False for e, r in composition.items(): @@ -416,61 +476,89 @@ def delete(self, ids=None, indices=None, condition=None, selection=False): break if valid: - chemical_species = self.graph.create_node(f'{self._name}_ChemicalSpecies', CMSO.ChemicalSpecies) + chemical_species = self.graph.create_node( + f"{self._name}_ChemicalSpecies", CMSO.ChemicalSpecies + ) self.graph.add((self.sample, CMSO.hasSpecies, chemical_species)) for e, r in composition.items(): if e in element_indetifiers.keys(): - element = self.graph.create_node(element_indetifiers[e], CMSO.ChemicalElement) + element = self.graph.create_node( + element_indetifiers[e], CMSO.ChemicalElement + ) self.graph.add((chemical_species, CMSO.hasElement, element)) - self.graph.add((element, CMSO.hasSymbol, Literal(e, datatype=XSD.string))) - self.graph.add((element, CMSO.hasElementRatio, Literal(r, datatype=XSD.float))) - - #we also have to read in file and clean it up - filepath = self.graph.value(URIRef(f'{self.sample}_Position'), CMSO.hasPath).toPython() - position_identifier = self.graph.value(URIRef(f'{self.sample}_Position'), CMSO.hasIdentifier).toPython() - species_identifier = self.graph.value(URIRef(f'{self.sample}_Species'), CMSO.hasIdentifier).toPython() - - #clean up items + self.graph.add( + (element, CMSO.hasSymbol, Literal(e, datatype=XSD.string)) + ) + self.graph.add( + ( + element, + CMSO.hasElementRatio, + Literal(r, datatype=XSD.float), + ) + ) + + # we also have to read in file and clean it up + filepath = self.graph.value( + URIRef(f"{self.sample}_Position"), CMSO.hasPath + ).toPython() + position_identifier = self.graph.value( + URIRef(f"{self.sample}_Position"), CMSO.hasIdentifier + ).toPython() + species_identifier = self.graph.value( + URIRef(f"{self.sample}_Species"), CMSO.hasIdentifier + ).toPython() + + # clean up items datadict = { - position_identifier:{ + position_identifier: { "value": self.schema.atom_attribute.position(), - "label": "position", + "label": "position", }, - species_identifier:{ + species_identifier: { "value": self.schema.atom_attribute.species(), - "label": "species", + "label": "species", }, } - outfile = os.path.join(self.graph.structure_store, str(self._name).split(':')[-1]) - json_io.write_file(outfile, datadict) - - - def substitute_atoms(self, substitution_element, ids=None, indices=None, condition=None, selection=False): - masks = self.atoms._generate_bool_list(ids=ids, indices=indices, condition=condition, selection=selection) + outfile = os.path.join( + self.graph.structure_store, str(self._name).split(":")[-1] + ) + json_io.write_file(outfile, datadict) + + def substitute_atoms( + self, + substitution_element, + ids=None, + indices=None, + condition=None, + selection=False, + ): + masks = self.atoms._generate_bool_list( + ids=ids, indices=indices, condition=condition, selection=selection + ) delete_list = [masks[self.atoms["head"][x]] for x in range(self.atoms.ntotal)] delete_ids = [x for x in range(self.atoms.ntotal) if delete_list[x]] type_dict = self.atoms._type_dict - rtype_dict = {val:key for key,val in type_dict.items()} + rtype_dict = {val: key for key, val in type_dict.items()} if substitution_element in rtype_dict.keys(): atomtype = rtype_dict[substitution_element] else: - maxtype = max(self.atoms['types'])+1 - + maxtype = max(self.atoms["types"]) + 1 + for x in delete_ids: - self.atoms['species'][x] = substitution_element - self.atoms['types'][x] = maxtype + self.atoms["species"][x] = substitution_element + self.atoms["types"][x] = maxtype - #operate on the graph + # operate on the graph if self.graph is not None: chemical_species = self.graph.value(self.sample, CMSO.hasSpecies) - #start by cleanly removing elements + # start by cleanly removing elements for s in self.graph.triples((chemical_species, CMSO.hasElement, None)): element = s[2] self.graph.remove((element, None, None)) self.graph.remove((chemical_species, None, None)) self.graph.remove((self.sample, CMSO.hasSpecies, None)) - + composition = self.schema.material.element_ratio() valid = False for e, r in composition.items(): @@ -479,37 +567,58 @@ def substitute_atoms(self, substitution_element, ids=None, indices=None, conditi break if valid: - chemical_species = self.graph.create_node(f'{self._name}_ChemicalSpecies', CMSO.ChemicalSpecies) + chemical_species = self.graph.create_node( + f"{self._name}_ChemicalSpecies", CMSO.ChemicalSpecies + ) self.graph.add((self.sample, CMSO.hasSpecies, chemical_species)) for e, r in composition.items(): if e in element_indetifiers.keys(): - element = self.graph.create_node(element_indetifiers[e], CMSO.ChemicalElement) + element = self.graph.create_node( + element_indetifiers[e], CMSO.ChemicalElement + ) self.graph.add((chemical_species, CMSO.hasElement, element)) - self.graph.add((element, CMSO.hasSymbol, Literal(e, datatype=XSD.string))) - self.graph.add((element, CMSO.hasElementRatio, Literal(r, datatype=XSD.float))) - - #we also have to read in file and clean it up - filepath = self.graph.value(URIRef(f'{self.sample}_Position'), CMSO.hasPath).toPython() - position_identifier = self.graph.value(URIRef(f'{self.sample}_Position'), CMSO.hasIdentifier).toPython() - species_identifier = self.graph.value(URIRef(f'{self.sample}_Species'), CMSO.hasIdentifier).toPython() - - #clean up items + self.graph.add( + (element, CMSO.hasSymbol, Literal(e, datatype=XSD.string)) + ) + self.graph.add( + ( + element, + CMSO.hasElementRatio, + Literal(r, datatype=XSD.float), + ) + ) + + # we also have to read in file and clean it up + filepath = self.graph.value( + URIRef(f"{self.sample}_Position"), CMSO.hasPath + ).toPython() + position_identifier = self.graph.value( + URIRef(f"{self.sample}_Position"), CMSO.hasIdentifier + ).toPython() + species_identifier = self.graph.value( + URIRef(f"{self.sample}_Species"), CMSO.hasIdentifier + ).toPython() + + # clean up items datadict = { - position_identifier:{ + position_identifier: { "value": self.schema.atom_attribute.position(), - "label": "position", + "label": "position", }, - species_identifier:{ + species_identifier: { "value": self.schema.atom_attribute.species(), - "label": "species", + "label": "species", }, } - outfile = os.path.join(self.graph.structure_store, str(self._name).split(':')[-1]) - json_io.write_file(outfile, datadict) - + outfile = os.path.join( + self.graph.structure_store, str(self._name).split(":")[-1] + ) + json_io.write_file(outfile, datadict) - def add_interstitial_impurities(self, element, void_type='tetrahedral', lattice_constant=None, threshold=0.01): + def add_interstitial_impurities( + self, element, void_type="tetrahedral", lattice_constant=None, threshold=0.01 + ): """ Add interstitial impurities to the System @@ -525,7 +634,7 @@ def add_interstitial_impurities(self, element, void_type='tetrahedral', lattice_ Returns ------- - System: + System: system with the added impurities Notes @@ -534,70 +643,77 @@ def add_interstitial_impurities(self, element, void_type='tetrahedral', lattice_ interstitials could affect the addition. """ if None in self.atoms.species: - raise ValueError('Assign species!') + raise ValueError("Assign species!") - if void_type == 'tetrahedral': + if void_type == "tetrahedral": element = np.atleast_1d(element) - self.find.neighbors(method='voronoi', cutoff=0.1) + self.find.neighbors(method="voronoi", cutoff=0.1) verts = self.unique_vertices randindex = np.random.randint(0, len(verts), len(element)) randpos = np.array(verts)[randindex] - - elif void_type == 'octahedral': + elif void_type == "octahedral": if lattice_constant is None: - if 'lattice_constant' in self.lattice_properties.keys(): - lattice_constant = self.lattice_properties['lattice_constant'] + if "lattice_constant" in self.lattice_properties.keys(): + lattice_constant = self.lattice_properties["lattice_constant"] else: - raise ValueError('lattice constant is needed for octahedral voids, please provide') + raise ValueError( + "lattice constant is needed for octahedral voids, please provide" + ) - cutoff = lattice_constant + threshold*2 - self.find.neighbors(method='cutoff', cutoff=cutoff) + cutoff = lattice_constant + threshold * 2 + self.find.neighbors(method="cutoff", cutoff=cutoff) octa_pos = [] for count, dist in enumerate(self.atoms.neighbors.distance): - diffs = np.abs(np.array(dist)-lattice_constant) - #print(diffs) - indices = np.where(diffs < 1E-2)[0] - #index_neighbor = np.array(self.atoms["neighbors"][count])[indices] - #real_indices = np.array(self.atoms.neighbors.index[count])[indices] - #create a dict - #index_dict = {str(x):y for x,y in zip(real_indices, ghost_indices)} + diffs = np.abs(np.array(dist) - lattice_constant) + # print(diffs) + indices = np.where(diffs < 1e-2)[0] + # index_neighbor = np.array(self.atoms["neighbors"][count])[indices] + # real_indices = np.array(self.atoms.neighbors.index[count])[indices] + # create a dict + # index_dict = {str(x):y for x,y in zip(real_indices, ghost_indices)} vector = np.array(self.atoms["diff"][count])[indices] - vector = self.atoms.positions[count] + vector/2 + vector = self.atoms.positions[count] + vector / 2 for vect in vector: vect = self.modify.remap_position_to_box(vect) - #print(vect) + # print(vect) octa_pos.append(vect) - + randindex = np.random.randint(0, len(octa_pos), len(element)) randpos = np.unique(octa_pos, axis=0)[randindex] - + if not len(randpos) == len(element): - raise ValueError('not enough octahedral positions found!') + raise ValueError("not enough octahedral positions found!") else: - raise ValueError('void_type can only be tetrahedral/octahedral') + raise ValueError("void_type can only be tetrahedral/octahedral") - #create new system with the atoms added - sysn = System(source=self.add_atoms({'positions': randpos, 'species':element})) - #attach graphs + # create new system with the atoms added + sysn = System(source=self.add_atoms({"positions": randpos, "species": element})) + # attach graphs sysn.sample = self.sample sysn.graph = self.graph - #now we have to verify the triples correctly and add them in + # now we have to verify the triples correctly and add them in if self.graph is not None: self.graph.remove((self.sample, CMSO.hasNumberOfAtoms, None)) - self.graph.add((self.sample, CMSO.hasNumberOfAtoms, Literal(sysn.natoms, datatype=XSD.integer))) - #revamp composition - #remove existing chem composution + self.graph.add( + ( + self.sample, + CMSO.hasNumberOfAtoms, + Literal(sysn.natoms, datatype=XSD.integer), + ) + ) + # revamp composition + # remove existing chem composution chemical_species = self.graph.value(self.sample, CMSO.hasSpecies) - #start by cleanly removing elements + # start by cleanly removing elements for s in self.graph.triples((chemical_species, CMSO.hasElement, None)): element = s[2] self.graph.remove((element, None, None)) self.graph.remove((chemical_species, None, None)) self.graph.remove((self.sample, CMSO.hasSpecies, None)) - + composition = sysn.schema.material.element_ratio() valid = False for e, r in composition.items(): @@ -606,42 +722,61 @@ def add_interstitial_impurities(self, element, void_type='tetrahedral', lattice_ break if valid: - chemical_species = self.graph.create_node(f'{self._name}_ChemicalSpecies', CMSO.ChemicalSpecies) + chemical_species = self.graph.create_node( + f"{self._name}_ChemicalSpecies", CMSO.ChemicalSpecies + ) self.graph.add((self.sample, CMSO.hasSpecies, chemical_species)) for e, r in composition.items(): if e in element_indetifiers.keys(): - element = self.graph.create_node(element_indetifiers[e], CMSO.ChemicalElement) + element = self.graph.create_node( + element_indetifiers[e], CMSO.ChemicalElement + ) self.graph.add((chemical_species, CMSO.hasElement, element)) - self.graph.add((element, CMSO.hasSymbol, Literal(e, datatype=XSD.string))) - self.graph.add((element, CMSO.hasElementRatio, Literal(r, datatype=XSD.float))) - - #we also have to read in file and clean it up - filepath = self.graph.value(URIRef(f'{self.sample}_Position'), CMSO.hasPath).toPython() - position_identifier = self.graph.value(URIRef(f'{self.sample}_Position'), CMSO.hasIdentifier).toPython() - species_identifier = self.graph.value(URIRef(f'{self.sample}_Species'), CMSO.hasIdentifier).toPython() - - #clean up items + self.graph.add( + (element, CMSO.hasSymbol, Literal(e, datatype=XSD.string)) + ) + self.graph.add( + ( + element, + CMSO.hasElementRatio, + Literal(r, datatype=XSD.float), + ) + ) + + # we also have to read in file and clean it up + filepath = self.graph.value( + URIRef(f"{self.sample}_Position"), CMSO.hasPath + ).toPython() + position_identifier = self.graph.value( + URIRef(f"{self.sample}_Position"), CMSO.hasIdentifier + ).toPython() + species_identifier = self.graph.value( + URIRef(f"{self.sample}_Species"), CMSO.hasIdentifier + ).toPython() + + # clean up items datadict = { - position_identifier:{ + position_identifier: { "value": sysn.schema.atom_attribute.position(), - "label": "position", + "label": "position", }, - species_identifier:{ + species_identifier: { "value": sysn.schema.atom_attribute.species(), - "label": "species", + "label": "species", }, } - outfile = os.path.join(self.graph.structure_store, str(self._name).split(':')[-1]) - json_io.write_file(outfile, datadict) - - return sysn + outfile = os.path.join( + self.graph.structure_store, str(self._name).split(":")[-1] + ) + json_io.write_file(outfile, datadict) + return sysn def __delitem__(self, val): if isinstance(val, int): val = [val] - #now the graph has to be updated accordingly + # now the graph has to be updated accordingly self.delete(indices=list(val)) def to_graph(self): @@ -657,14 +792,13 @@ def to_graph(self): self._add_crystal_structure() self._add_atoms() - def _generate_name(self, name_index=None): if self.names: if name_index is None: name_index = self.graph.n_samples + 1 - self._name = f'sample:{name_index}' + self._name = f"sample:{name_index}" else: - self._name = f'sample:{str(uuid.uuid4())}' + self._name = f"sample:{str(uuid.uuid4())}" def _add_sample(self): sample = self.graph.create_node(self._name, CMSO.AtomicScaleSample) @@ -682,7 +816,9 @@ def _add_material(self): Returns ------- """ - material = self.graph.create_node(f'{self._name}_Material', CMSO.CrystallineMaterial) + material = self.graph.create_node( + f"{self._name}_Material", CMSO.CrystallineMaterial + ) self.graph.add((self.sample, CMSO.hasMaterial, material)) self.material = material @@ -706,15 +842,27 @@ def _add_chemical_composition(self): break if valid: - chemical_species = self.graph.create_node(f'{self._name}_ChemicalSpecies', CMSO.ChemicalSpecies) + chemical_species = self.graph.create_node( + f"{self._name}_ChemicalSpecies", CMSO.ChemicalSpecies + ) self.graph.add((self.sample, CMSO.hasSpecies, chemical_species)) for e, r in composition.items(): if e in element_indetifiers.keys(): - element = self.graph.create_node(element_indetifiers[e], CMSO.ChemicalElement) + element = self.graph.create_node( + element_indetifiers[e], CMSO.ChemicalElement + ) self.graph.add((chemical_species, CMSO.hasElement, element)) - self.graph.add((element, CMSO.hasChemicalSymbol, Literal(e, datatype=XSD.string))) - self.graph.add((element, CMSO.hasElementRatio, Literal(r, datatype=XSD.float))) + self.graph.add( + ( + element, + CMSO.hasChemicalSymbol, + Literal(e, datatype=XSD.string), + ) + ) + self.graph.add( + (element, CMSO.hasElementRatio, Literal(r, datatype=XSD.float)) + ) def _add_simulation_cell(self): """ @@ -729,17 +877,31 @@ def _add_simulation_cell(self): ------- """ - simulation_cell = self.graph.create_node(f'{self._name}_SimulationCell', CMSO.SimulationCell) + simulation_cell = self.graph.create_node( + f"{self._name}_SimulationCell", CMSO.SimulationCell + ) self.graph.add((self.sample, CMSO.hasSimulationCell, simulation_cell)) - self.graph.add((simulation_cell, CMSO.hasVolume, - Literal(np.round(self.schema.simulation_cell.volume(), decimals=2), - datatype=XSD.float))) - self.graph.add((self.sample, CMSO.hasNumberOfAtoms, - Literal(self.schema.simulation_cell.number_of_atoms(), - datatype=XSD.integer))) + self.graph.add( + ( + simulation_cell, + CMSO.hasVolume, + Literal( + np.round(self.schema.simulation_cell.volume(), decimals=2), + datatype=XSD.float, + ), + ) + ) + self.graph.add( + ( + self.sample, + CMSO.hasNumberOfAtoms, + Literal( + self.schema.simulation_cell.number_of_atoms(), datatype=XSD.integer + ), + ) + ) self.simulation_cell = simulation_cell - - + def _add_simulation_cell_properties(self): """ Add a CMSO SimulationCell properties such as SimulationCellLength, @@ -753,40 +915,145 @@ def _add_simulation_cell_properties(self): Returns ------- """ - simulation_cell_length = self.graph.create_node(f'{self._name}_SimulationCellLength', CMSO.SimulationCellLength) + simulation_cell_length = self.graph.create_node( + f"{self._name}_SimulationCellLength", CMSO.SimulationCellLength + ) self.graph.add((self.simulation_cell, CMSO.hasLength, simulation_cell_length)) data = self.schema.simulation_cell.length() - self.graph.add((simulation_cell_length, CMSO.hasLength_x, Literal(data[0], datatype=XSD.float))) - self.graph.add((simulation_cell_length, CMSO.hasLength_y, Literal(data[1], datatype=XSD.float))) - self.graph.add((simulation_cell_length, CMSO.hasLength_z, Literal(data[2], datatype=XSD.float))) - - simulation_cell_vector_01 = self.graph.create_node(f'{self._name}_SimulationCellVector_1', CMSO.SimulationCellVector) + self.graph.add( + ( + simulation_cell_length, + CMSO.hasLength_x, + Literal(data[0], datatype=XSD.float), + ) + ) + self.graph.add( + ( + simulation_cell_length, + CMSO.hasLength_y, + Literal(data[1], datatype=XSD.float), + ) + ) + self.graph.add( + ( + simulation_cell_length, + CMSO.hasLength_z, + Literal(data[2], datatype=XSD.float), + ) + ) + + simulation_cell_vector_01 = self.graph.create_node( + f"{self._name}_SimulationCellVector_1", CMSO.SimulationCellVector + ) data = self.schema.simulation_cell.vector() - self.graph.add((self.simulation_cell, CMSO.hasVector, simulation_cell_vector_01)) - self.graph.add((simulation_cell_vector_01, CMSO.hasComponent_x, Literal(data[0][0], datatype=XSD.float))) - self.graph.add((simulation_cell_vector_01, CMSO.hasComponent_y, Literal(data[0][1], datatype=XSD.float))) - self.graph.add((simulation_cell_vector_01, CMSO.hasComponent_z, Literal(data[0][2], datatype=XSD.float))) - - simulation_cell_vector_02 = self.graph.create_node(f'{self._name}_SimulationCellVector_2', CMSO.SimulationCellVector) - self.graph.add((self.simulation_cell, CMSO.hasVector, simulation_cell_vector_02)) - self.graph.add((simulation_cell_vector_02, CMSO.hasComponent_x, Literal(data[1][0], datatype=XSD.float))) - self.graph.add((simulation_cell_vector_02, CMSO.hasComponent_y, Literal(data[1][1], datatype=XSD.float))) - self.graph.add((simulation_cell_vector_02, CMSO.hasComponent_z, Literal(data[1][2], datatype=XSD.float))) - - simulation_cell_vector_03 = self.graph.create_node(f'{self._name}_SimulationCellVector_3', CMSO.SimulationCellVector) - self.graph.add((self.simulation_cell, CMSO.hasVector, simulation_cell_vector_03)) - self.graph.add((simulation_cell_vector_03, CMSO.hasComponent_x, Literal(data[2][0], datatype=XSD.float))) - self.graph.add((simulation_cell_vector_03, CMSO.hasComponent_y, Literal(data[2][1], datatype=XSD.float))) - self.graph.add((simulation_cell_vector_03, CMSO.hasComponent_z, Literal(data[2][2], datatype=XSD.float))) - - simulation_cell_angle = self.graph.create_node(f'{self._name}_SimulationCellAngle', CMSO.SimulationCellAngle) + self.graph.add( + (self.simulation_cell, CMSO.hasVector, simulation_cell_vector_01) + ) + self.graph.add( + ( + simulation_cell_vector_01, + CMSO.hasComponent_x, + Literal(data[0][0], datatype=XSD.float), + ) + ) + self.graph.add( + ( + simulation_cell_vector_01, + CMSO.hasComponent_y, + Literal(data[0][1], datatype=XSD.float), + ) + ) + self.graph.add( + ( + simulation_cell_vector_01, + CMSO.hasComponent_z, + Literal(data[0][2], datatype=XSD.float), + ) + ) + + simulation_cell_vector_02 = self.graph.create_node( + f"{self._name}_SimulationCellVector_2", CMSO.SimulationCellVector + ) + self.graph.add( + (self.simulation_cell, CMSO.hasVector, simulation_cell_vector_02) + ) + self.graph.add( + ( + simulation_cell_vector_02, + CMSO.hasComponent_x, + Literal(data[1][0], datatype=XSD.float), + ) + ) + self.graph.add( + ( + simulation_cell_vector_02, + CMSO.hasComponent_y, + Literal(data[1][1], datatype=XSD.float), + ) + ) + self.graph.add( + ( + simulation_cell_vector_02, + CMSO.hasComponent_z, + Literal(data[1][2], datatype=XSD.float), + ) + ) + + simulation_cell_vector_03 = self.graph.create_node( + f"{self._name}_SimulationCellVector_3", CMSO.SimulationCellVector + ) + self.graph.add( + (self.simulation_cell, CMSO.hasVector, simulation_cell_vector_03) + ) + self.graph.add( + ( + simulation_cell_vector_03, + CMSO.hasComponent_x, + Literal(data[2][0], datatype=XSD.float), + ) + ) + self.graph.add( + ( + simulation_cell_vector_03, + CMSO.hasComponent_y, + Literal(data[2][1], datatype=XSD.float), + ) + ) + self.graph.add( + ( + simulation_cell_vector_03, + CMSO.hasComponent_z, + Literal(data[2][2], datatype=XSD.float), + ) + ) + + simulation_cell_angle = self.graph.create_node( + f"{self._name}_SimulationCellAngle", CMSO.SimulationCellAngle + ) data = self.schema.simulation_cell.angle() self.graph.add((self.simulation_cell, CMSO.hasAngle, simulation_cell_angle)) - self.graph.add((simulation_cell_angle, CMSO.hasAngle_alpha, Literal(data[0], datatype=XSD.float))) - self.graph.add((simulation_cell_angle, CMSO.hasAngle_beta, Literal(data[1], datatype=XSD.float))) - self.graph.add((simulation_cell_angle, CMSO.hasAngle_gamma, Literal(data[2], datatype=XSD.float))) - - + self.graph.add( + ( + simulation_cell_angle, + CMSO.hasAngle_alpha, + Literal(data[0], datatype=XSD.float), + ) + ) + self.graph.add( + ( + simulation_cell_angle, + CMSO.hasAngle_beta, + Literal(data[1], datatype=XSD.float), + ) + ) + self.graph.add( + ( + simulation_cell_angle, + CMSO.hasAngle_gamma, + Literal(data[2], datatype=XSD.float), + ) + ) + def _add_crystal_structure(self, targets=None): """ Add a CMSO Crystal Structure @@ -800,28 +1067,35 @@ def _add_crystal_structure(self, targets=None): ------- """ if targets is None: - targets = [self.schema.material.crystal_structure.name(), - self.schema.material.crystal_structure.spacegroup_symbol(), - self.schema.material.crystal_structure.spacegroup_number(), - self.schema.material.crystal_structure.unit_cell.bravais_lattice(), - self.schema.material.crystal_structure.unit_cell.lattice_parameter(), - self.schema.material.crystal_structure.unit_cell.angle() + targets = [ + self.schema.material.crystal_structure.name(), + self.schema.material.crystal_structure.spacegroup_symbol(), + self.schema.material.crystal_structure.spacegroup_number(), + self.schema.material.crystal_structure.unit_cell.bravais_lattice(), + self.schema.material.crystal_structure.unit_cell.lattice_parameter(), + self.schema.material.crystal_structure.unit_cell.angle(), ] valid = self.graph._is_valid(targets) if valid: - crystal_structure = self.graph.create_node(f'{self._name}_CrystalStructure', CMSO.CrystalStructure) + crystal_structure = self.graph.create_node( + f"{self._name}_CrystalStructure", CMSO.CrystalStructure + ) self.graph.add((self.material, CMSO.hasStructure, crystal_structure)) - self.graph.add((crystal_structure, CMSO.hasAltName, - Literal(targets[0], - datatype=XSD.string))) + self.graph.add( + ( + crystal_structure, + CMSO.hasAltName, + Literal(targets[0], datatype=XSD.string), + ) + ) self.crystal_structure = crystal_structure if targets[1] is not None: self._add_space_group(targets[1], targets[2]) - #now see if unit cell needs to be added + # now see if unit cell needs to be added valid = self.graph._is_valid(targets[3:]) if valid: self._add_unit_cell() @@ -830,8 +1104,6 @@ def _add_crystal_structure(self, targets=None): if targets[4] is not None: self._add_lattice_properties(targets[4], targets[5]) - - def _add_space_group(self, spacegroup_symbol, spacegroup_number): """ Add a CMSO Space Group @@ -844,14 +1116,23 @@ def _add_space_group(self, spacegroup_symbol, spacegroup_number): Returns ------- """ - space_group = URIRef(f'{self._name}_SpaceGroup') + space_group = URIRef(f"{self._name}_SpaceGroup") self.graph.add((self.crystal_structure, CMSO.hasSpaceGroup, space_group)) - self.graph.add((space_group, CMSO.hasSpaceGroupSymbol, - Literal(spacegroup_symbol, datatype=XSD.string))) - self.graph.add((space_group, CMSO.hasSpaceGroupNumber, - Literal(spacegroup_number, datatype=XSD.integer))) - - + self.graph.add( + ( + space_group, + CMSO.hasSpaceGroupSymbol, + Literal(spacegroup_symbol, datatype=XSD.string), + ) + ) + self.graph.add( + ( + space_group, + CMSO.hasSpaceGroupNumber, + Literal(spacegroup_number, datatype=XSD.integer), + ) + ) + def _add_unit_cell(self): """ Add a CMSO Unit Cell @@ -865,20 +1146,27 @@ def _add_unit_cell(self): ------- """ - unit_cell = self.graph.create_node(f'{self._name}_UnitCell', CMSO.UnitCell) + unit_cell = self.graph.create_node(f"{self._name}_UnitCell", CMSO.UnitCell) self.graph.add((self.crystal_structure, CMSO.hasUnitCell, unit_cell)) self.unit_cell = unit_cell - - + def _add_bravais_lattice(self, bv): - #add bravais lattice + # add bravais lattice bv = URIRef(bv) - self.graph.add((self.unit_cell, Namespace("http://purls.helmholtz-metadaten.de/cmso/").hasBravaisLattice, bv)) - + self.graph.add( + ( + self.unit_cell, + Namespace( + "http://purls.helmholtz-metadaten.de/cmso/" + ).hasBravaisLattice, + bv, + ) + ) + def _add_lattice_properties(self, lattice_parameter_value, lattice_angle_value): """ Add CMSO lattice properties such as Lattice Parameter, - and its lengths and angles. + and its lengths and angles. Parameters ---------- @@ -888,37 +1176,78 @@ def _add_lattice_properties(self, lattice_parameter_value, lattice_angle_value): Returns ------- """ - lattice_parameter = self.graph.create_node(f'{self._name}_LatticeParameter', CMSO.LatticeParameter) + lattice_parameter = self.graph.create_node( + f"{self._name}_LatticeParameter", CMSO.LatticeParameter + ) self.graph.add((self.unit_cell, CMSO.hasLatticeParameter, lattice_parameter)) - self.graph.add((lattice_parameter, CMSO.hasLength_x, Literal(lattice_parameter_value[0], datatype=XSD.float))) - self.graph.add((lattice_parameter, CMSO.hasLength_y, Literal(lattice_parameter_value[1], datatype=XSD.float))) - self.graph.add((lattice_parameter, CMSO.hasLength_z, Literal(lattice_parameter_value[2], datatype=XSD.float))) - - lattice_angle = self.graph.create_node(f'{self._name}_LatticeAngle', CMSO.LatticeAngle) + self.graph.add( + ( + lattice_parameter, + CMSO.hasLength_x, + Literal(lattice_parameter_value[0], datatype=XSD.float), + ) + ) + self.graph.add( + ( + lattice_parameter, + CMSO.hasLength_y, + Literal(lattice_parameter_value[1], datatype=XSD.float), + ) + ) + self.graph.add( + ( + lattice_parameter, + CMSO.hasLength_z, + Literal(lattice_parameter_value[2], datatype=XSD.float), + ) + ) + + lattice_angle = self.graph.create_node( + f"{self._name}_LatticeAngle", CMSO.LatticeAngle + ) self.graph.add((self.unit_cell, CMSO.hasAngle, lattice_angle)) - self.graph.add((lattice_angle, CMSO.hasAngle_alpha, Literal(lattice_angle_value[0], datatype=XSD.float))) - self.graph.add((lattice_angle, CMSO.hasAngle_beta, Literal(lattice_angle_value[1], datatype=XSD.float))) - self.graph.add((lattice_angle, CMSO.hasAngle_gamma, Literal(lattice_angle_value[2], datatype=XSD.float))) - + self.graph.add( + ( + lattice_angle, + CMSO.hasAngle_alpha, + Literal(lattice_angle_value[0], datatype=XSD.float), + ) + ) + self.graph.add( + ( + lattice_angle, + CMSO.hasAngle_beta, + Literal(lattice_angle_value[1], datatype=XSD.float), + ) + ) + self.graph.add( + ( + lattice_angle, + CMSO.hasAngle_gamma, + Literal(lattice_angle_value[2], datatype=XSD.float), + ) + ) def _save_atom_attributes(self, position_identifier, species_identifier): - #if self.store == 'pyiron': + # if self.store == 'pyiron': # pass - #else: + # else: # #this is the file based store system datadict = { - position_identifier:{ + position_identifier: { "value": self.schema.atom_attribute.position(), - "label": "position", + "label": "position", }, - species_identifier:{ + species_identifier: { "value": self.schema.atom_attribute.species(), - "label": "species", + "label": "species", }, } - outfile = os.path.join(self.graph.structure_store, str(self._name).split(':')[-1]) - json_io.write_file(outfile, datadict) - return os.path.relpath(outfile+'.json') + outfile = os.path.join( + self.graph.structure_store, str(self._name).split(":")[-1] + ) + json_io.write_file(outfile, datadict) + return os.path.relpath(outfile + ".json") def _add_atoms(self): """ @@ -937,27 +1266,63 @@ def _add_atoms(self): Note that for the moment, we will dump the structures in a given folder, maybe this could be input from the Job class directly """ - #now we write out file + # now we write out file position_identifier = str(uuid.uuid4()) species_identifier = str(uuid.uuid4()) outfile = self._save_atom_attributes(position_identifier, species_identifier) if "positions" in self.atoms.keys(): - position = self.graph.create_node(f'{self._name}_Position', CMSO.AtomAttribute) - self.graph.add((self.sample, Namespace("http://purls.helmholtz-metadaten.de/cmso/").hasAttribute, position)) - self.graph.add((position, CMSO.hasName, Literal('Position', datatype=XSD.string))) - self.graph.add((position, CMSO.hasIdentifier, Literal(position_identifier, datatype=XSD.string))) - self.graph.add((position, CMSO.hasPath, Literal(outfile, datatype=XSD.string))) + position = self.graph.create_node( + f"{self._name}_Position", CMSO.AtomAttribute + ) + self.graph.add( + ( + self.sample, + Namespace("http://purls.helmholtz-metadaten.de/cmso/").hasAttribute, + position, + ) + ) + self.graph.add( + (position, CMSO.hasName, Literal("Position", datatype=XSD.string)) + ) + self.graph.add( + ( + position, + CMSO.hasIdentifier, + Literal(position_identifier, datatype=XSD.string), + ) + ) + self.graph.add( + (position, CMSO.hasPath, Literal(outfile, datatype=XSD.string)) + ) if "species" in self.atoms.keys(): - species = self.graph.create_node(f'{self._name}_Species', CMSO.AtomAttribute) - self.graph.add((self.sample, Namespace("http://purls.helmholtz-metadaten.de/cmso/").hasAttribute, species)) - self.graph.add((species, CMSO.hasName, Literal('Species', datatype=XSD.string))) - self.graph.add((species, CMSO.hasIdentifier, Literal(species_identifier, datatype=XSD.string))) - self.graph.add((species, CMSO.hasPath, Literal(outfile, datatype=XSD.string))) - - #if "velocities" in self.sys.atoms.keys(): + species = self.graph.create_node( + f"{self._name}_Species", CMSO.AtomAttribute + ) + self.graph.add( + ( + self.sample, + Namespace("http://purls.helmholtz-metadaten.de/cmso/").hasAttribute, + species, + ) + ) + self.graph.add( + (species, CMSO.hasName, Literal("Species", datatype=XSD.string)) + ) + self.graph.add( + ( + species, + CMSO.hasIdentifier, + Literal(species_identifier, datatype=XSD.string), + ) + ) + self.graph.add( + (species, CMSO.hasPath, Literal(outfile, datatype=XSD.string)) + ) + + # if "velocities" in self.sys.atoms.keys(): # uname = None # if name is not None: # uname = f'{name}_Velocity' @@ -966,19 +1331,18 @@ def _add_atoms(self): # self.add((velocity, RDF.type, CMSO.AtomAttribute)) # self.add((velocity, CMSO.hasName, Literal('Velocity', data_type=XSD.string))) # velocity_identifier = uuid.uuid4() - # self.add((velocity, CMSO.hasIdentifier, Literal(velocity_identifier, datatype=XSD.string))) + # self.add((velocity, CMSO.hasIdentifier, Literal(velocity_identifier, datatype=XSD.string))) - #if "forces" in self.sys.atoms.keys(): + # if "forces" in self.sys.atoms.keys(): # uname = None # if name is not None: - # uname = f'{name}_Force' + # uname = f'{name}_Force' # force = BNode(uname) # self.add((self.sample, CMSO.hasAttribute, force)) # self.add((force, RDF.type, CMSO.AtomAttribute)) # self.add((force, CMSO.hasName, Literal('Force', data_type=XSD.string))) # force_identifier = uuid.uuid4() - # self.add((force, CMSO.hasIdentifier, Literal(force_identifier, datatype=XSD.string))) - + # self.add((force, CMSO.hasIdentifier, Literal(force_identifier, datatype=XSD.string))) def add_vacancy(self, concentration, number=None): """ @@ -998,11 +1362,23 @@ def add_vacancy(self, concentration, number=None): if self.graph is None: return - vacancy = self.graph.create_node(f'{self._name}_Vacancy', PODO.Vacancy) + vacancy = self.graph.create_node(f"{self._name}_Vacancy", PODO.Vacancy) self.graph.add((self.material, CMSO.hasDefect, vacancy)) - self.graph.add((self.simulation_cell, PODO.hasVacancyConcentration, Literal(concentration, datatype=XSD.float))) + self.graph.add( + ( + self.simulation_cell, + PODO.hasVacancyConcentration, + Literal(concentration, datatype=XSD.float), + ) + ) if number is not None: - self.graph.add((self.simulation_cell, PODO.hasNumberOfVacancies, Literal(number, datatype=XSD.integer))) + self.graph.add( + ( + self.simulation_cell, + PODO.hasNumberOfVacancies, + Literal(number, datatype=XSD.integer), + ) + ) def add_gb(self, gb_dict): """ @@ -1020,30 +1396,60 @@ def add_gb(self, gb_dict): ------- """ - #mark that the structure has a defect + # mark that the structure has a defect if self.graph is None: return - + if gb_dict["GBType"] is None: - plane_defect = self.graph.create_node(f'{self._name}_GrainBoundary') - + plane_defect = self.graph.create_node(f"{self._name}_GrainBoundary") + elif gb_dict["GBType"] == "Twist": - plane_defect = self.graph.create_node(f'{self._name}_TwistGrainBoundary', PLDO.TwistGrainBoundary) - + plane_defect = self.graph.create_node( + f"{self._name}_TwistGrainBoundary", PLDO.TwistGrainBoundary + ) + elif gb_dict["GBType"] == "Tilt": - plane_defect = self.graph.create_node(f'{self._name}_TiltGrainBoundary', PLDO.TiltGrainBoundary) - + plane_defect = self.graph.create_node( + f"{self._name}_TiltGrainBoundary", PLDO.TiltGrainBoundary + ) + elif gb_dict["GBType"] == "Symmetric Tilt": - plane_defect = self.graph.create_node(f'{self._name}_SymmetricalTiltGrainBoundary', PLDO.SymmetricalTiltGrainBoundary) - + plane_defect = self.graph.create_node( + f"{self._name}_SymmetricalTiltGrainBoundary", + PLDO.SymmetricalTiltGrainBoundary, + ) + elif gb_dict["GBType"] == "Mixed": - plane_defect = self.graph.create_node(f'{self._name}_MixedGrainBoundary', PLDO.MixedGrainBoundary) - - self.graph.add((self.material, CMSO.hasDefect, plane_defect)) - self.graph.add((plane_defect, PLDO.hasSigmaValue, Literal(gb_dict["sigma"], datatype=XSD.integer))) - self.graph.add((plane_defect, PLDO.hasGBplane, Literal(gb_dict["GBPlane"], - datatype=XSD.string))) - self.graph.add((plane_defect, PLDO.hasRotationAxis, Literal(gb_dict["RotationAxis"], - datatype=XSD.string))) - self.graph.add((plane_defect, PLDO.hasMisorientationAngle, Literal(gb_dict["MisorientationAngle"], datatype=XSD.float))) + plane_defect = self.graph.create_node( + f"{self._name}_MixedGrainBoundary", PLDO.MixedGrainBoundary + ) + self.graph.add((self.material, CMSO.hasDefect, plane_defect)) + self.graph.add( + ( + plane_defect, + PLDO.hasSigmaValue, + Literal(gb_dict["sigma"], datatype=XSD.integer), + ) + ) + self.graph.add( + ( + plane_defect, + PLDO.hasGBplane, + Literal(gb_dict["GBPlane"], datatype=XSD.string), + ) + ) + self.graph.add( + ( + plane_defect, + PLDO.hasRotationAxis, + Literal(gb_dict["RotationAxis"], datatype=XSD.string), + ) + ) + self.graph.add( + ( + plane_defect, + PLDO.hasMisorientationAngle, + Literal(gb_dict["MisorientationAngle"], datatype=XSD.float), + ) + ) diff --git a/atomrdf/visualize.py b/atomrdf/visualize.py index 881ac40..5ec4235 100644 --- a/atomrdf/visualize.py +++ b/atomrdf/visualize.py @@ -8,6 +8,7 @@ def get_title_from_BNode(x): return x.toPython() + def get_string_from_URI(x): """ Extract a presentable string from URI @@ -15,31 +16,32 @@ def get_string_from_URI(x): Also differentiate between fixed notes and URIs, and assign color """ raw = x.toPython() - #first try splitting by # + # first try splitting by # rawsplit = raw.split("#") if len(rawsplit) > 1: return rawsplit[-1], "URIRef" - - #try splitting by = for chebi values - if 'CHEBI' in raw: + + # try splitting by = for chebi values + if "CHEBI" in raw: rawsplit = raw.split("=") rawsplit = rawsplit[-1].split(":") if len(rawsplit) > 1: return ".".join(rawsplit[-2:]), "URIRef" - - if 'sample:' in raw: + + if "sample:" in raw: rawsplit = raw.split(":") if len(rawsplit) > 1: return "_".join(rawsplit), "BNode" - #just a normal url split now + # just a normal url split now rawsplit = raw.split("/") if len(rawsplit) > 1: - return ".".join(rawsplit[-2:]), "URIRef" + return ".".join(rawsplit[-2:]), "URIRef" - #none of the conditions, worked, which means its a hex string + # none of the conditions, worked, which means its a hex string return raw, "BNode" + def parse_object(x): if isinstance(x, BNode): return get_title_from_BNode(x), "BNode" @@ -48,32 +50,38 @@ def parse_object(x): elif isinstance(x, Literal): return str(x.title()), "Literal" + styledict = { "BNode": {"color": "#ffe6ff", "shape": "box", "style": "filled"}, "URIRef": {"color": "#ffffcc", "shape": "box", "style": "filled"}, "Literal": {"color": "#e6ffcc", "shape": "ellipse", "style": "filled"}, } + def _switch_box(box): if box == "box": - return 'rectangle' - #remember that only boxes will be used, circles no! - + return "rectangle" + # remember that only boxes will be used, circles no! + + def _fix_id(string1, istype1): - if istype1 == 'Literal': + if istype1 == "Literal": id1 = str(uuid.uuid4()) else: id1 = string1 return id1 -def visualize_graph(g, - styledict=styledict, - rankdir='TB', - hide_types=False, - workflow_view=False, - size=None, - layout='dot'): - + +def visualize_graph( + g, + styledict=styledict, + rankdir="TB", + hide_types=False, + workflow_view=False, + size=None, + layout="dot", +): + dot = graphviz.Digraph() dot.attr( @@ -83,66 +91,82 @@ def visualize_graph(g, layout=layout, overlap="false", ) - + for k in g: string1, istype1 = parse_object(k[0]) string2, istype2 = parse_object(k[2]) string3, istype = parse_object(k[1]) plot = True - + if workflow_view: - #we collapse sample information - #if cmso.connector is found, only use it is it is cmso.hasCalculated - #all sub sample props, indicated by sample_x_jsjsj will be ignored. + # we collapse sample information + # if cmso.connector is found, only use it is it is cmso.hasCalculated + # all sub sample props, indicated by sample_x_jsjsj will be ignored. green_list = ["hasCalculatedProperty", "wasCalculatedBy", "hasValue"] - ssplit = string3.split('.') - if (len(ssplit) == 2): - if (ssplit[0] == 'cmso') and (ssplit[1] not in green_list): + ssplit = string3.split(".") + if len(ssplit) == 2: + if (ssplit[0] == "cmso") and (ssplit[1] not in green_list): plot = False - if string3 == 'subClassOf': + if string3 == "subClassOf": plot = False - ssplit = string2.split('.') - if string3 == 'type': - if (ssplit[0] == 'cmso') and (ssplit[1] not in ["CalculatedProperty"]): + ssplit = string2.split(".") + if string3 == "type": + if (ssplit[0] == "cmso") and (ssplit[1] not in ["CalculatedProperty"]): plot = False - if (ssplit[0] == 'cmso') and (ssplit[1] in ["AtomicScaleSample"]): - dot.node(string1, label=string1, shape=styledict[istype1]["shape"], - style=styledict[istype1]["style"], - color=styledict[istype1]["color"], - fontsize=styledict[istype1]["fontsize"], - fontname=styledict[istype1]["fontname"]) - plot=False - - if hide_types and (string3 == 'type'): + if (ssplit[0] == "cmso") and (ssplit[1] in ["AtomicScaleSample"]): + dot.node( + string1, + label=string1, + shape=styledict[istype1]["shape"], + style=styledict[istype1]["style"], + color=styledict[istype1]["color"], + fontsize=styledict[istype1]["fontsize"], + fontname=styledict[istype1]["fontname"], + ) + plot = False + + if hide_types and (string3 == "type"): plot = False if not plot: continue - if istype1 == 'Literal': + if istype1 == "Literal": id1 = str(uuid.uuid4()) else: - id1 = string1 - dot.node(id1, label=string1, shape=styledict[istype1]["shape"], - style=styledict[istype1]["style"], - color=styledict[istype1]["color"], - fontsize=styledict[istype1]["fontsize"], - fontname=styledict[istype1]["fontname"]) - - if istype2 == 'Literal': + id1 = string1 + dot.node( + id1, + label=string1, + shape=styledict[istype1]["shape"], + style=styledict[istype1]["style"], + color=styledict[istype1]["color"], + fontsize=styledict[istype1]["fontsize"], + fontname=styledict[istype1]["fontname"], + ) + + if istype2 == "Literal": id2 = str(uuid.uuid4()) else: - id2 = string2 - dot.node(id2, label=string2, shape=styledict[istype2]["shape"], - style=styledict[istype2]["style"], - color=styledict[istype2]["color"], - fontsize=styledict[istype2]["fontsize"], - fontname=styledict[istype2]["fontname"]) - - dot.edge(id1, id2, color=styledict["edgecolor"], - label=string3, + id2 = string2 + dot.node( + id2, + label=string2, + shape=styledict[istype2]["shape"], + style=styledict[istype2]["style"], + color=styledict[istype2]["color"], + fontsize=styledict[istype2]["fontsize"], + fontname=styledict[istype2]["fontname"], + ) + + dot.edge( + id1, + id2, + color=styledict["edgecolor"], + label=string3, fontsize=styledict[istype2]["fontsize"], - fontname=styledict[istype2]["fontname"]) - - return dot \ No newline at end of file + fontname=styledict[istype2]["fontname"], + ) + + return dot diff --git a/atomrdf/workflow/__init__.py b/atomrdf/workflow/__init__.py index f223139..83c8c9d 100644 --- a/atomrdf/workflow/__init__.py +++ b/atomrdf/workflow/__init__.py @@ -1,2 +1 @@ from atomrdf.workflow.pyiron import inform_graph - diff --git a/atomrdf/workflow/pyiron.py b/atomrdf/workflow/pyiron.py index fd0b90a..d412345 100644 --- a/atomrdf/workflow/pyiron.py +++ b/atomrdf/workflow/pyiron.py @@ -1,6 +1,7 @@ """ Wrappers for pyiron jobs """ + import os import numpy as np from functools import partial, update_wrapper @@ -11,117 +12,124 @@ from atomrdf.structure import _make_crystal from atomrdf.structure import System + def _check_if_job_is_valid(job): - valid_jobs = ['Lammps', ] - + valid_jobs = [ + "Lammps", + ] + if not type(job).__name__ in valid_jobs: - raise TypeError('These type of pyiron Job is not currently supported') + raise TypeError("These type of pyiron Job is not currently supported") def _add_structures(job): initial_pyiron_structure = job.structure final_pyiron_structure = job.get_structure(frame=-1) - initial_pyscal_structure = System.read.ase(initial_pyiron_structure) + initial_pyscal_structure = System.read.ase(initial_pyiron_structure) initial_sample_id = None - if 'sample_id' in initial_pyiron_structure.info.keys(): - initial_sample_id = initial_pyiron_structure.info['sample_id'] - #add final structure + if "sample_id" in initial_pyiron_structure.info.keys(): + initial_sample_id = initial_pyiron_structure.info["sample_id"] + # add final structure final_pyscal_structure = System.read.ase(final_pyiron_structure) - - #now we do rthe transfer + + # now we do rthe transfer return initial_pyscal_structure, initial_sample_id, final_pyscal_structure, None def _identify_method(job): job_dict = job.input.to_dict() - input_dict = {job_dict['control_inp/data_dict']['Parameter'][x]:job_dict['control_inp/data_dict']['Value'][x] for x in range(len(job_dict['control_inp/data_dict']['Parameter']))} + input_dict = { + job_dict["control_inp/data_dict"]["Parameter"][x]: job_dict[ + "control_inp/data_dict" + ]["Value"][x] + for x in range(len(job_dict["control_inp/data_dict"]["Parameter"])) + } dof = [] temp = None press = None md_method = None ensemble = None - if 'min_style' in input_dict.keys(): - dof.append('AtomicPosition') - dof.append('CellVolume') - md_method = 'MolecularStatics' - - elif 'nve' in input_dict['fix___ensemble']: - if int(input_dict['run']) == 0: - method = 'static' - md_method = 'MolecularStatics' - ensemble = 'MicrocanonicalEnsemble' - - elif int(input_dict['run']) > 0: - method = 'md_nve' - dof.append('AtomicPosition') - md_method = 'MolecularDynamics' - ensemble = 'MicrocanonicalEnsemble' - - - elif 'nvt' in input_dict['fix___ensemble']: - method = 'md_nvt' - raw = input_dict['fix___ensemble'].split() + if "min_style" in input_dict.keys(): + dof.append("AtomicPosition") + dof.append("CellVolume") + md_method = "MolecularStatics" + + elif "nve" in input_dict["fix___ensemble"]: + if int(input_dict["run"]) == 0: + method = "static" + md_method = "MolecularStatics" + ensemble = "MicrocanonicalEnsemble" + + elif int(input_dict["run"]) > 0: + method = "md_nve" + dof.append("AtomicPosition") + md_method = "MolecularDynamics" + ensemble = "MicrocanonicalEnsemble" + + elif "nvt" in input_dict["fix___ensemble"]: + method = "md_nvt" + raw = input_dict["fix___ensemble"].split() temp = float(raw[3]) - dof.append('AtomicPosition') - md_method = 'MolecularDynamics' - ensemble = 'CanonicalEnsemble' - - elif 'npt' in input_dict['fix___ensemble']: - dof.append('AtomicPosition') - dof.append('CellVolume') - if 'aniso' in input_dict['fix___ensemble']: - method = 'md_npt_aniso' - dof.append('CellShape') + dof.append("AtomicPosition") + md_method = "MolecularDynamics" + ensemble = "CanonicalEnsemble" + + elif "npt" in input_dict["fix___ensemble"]: + dof.append("AtomicPosition") + dof.append("CellVolume") + if "aniso" in input_dict["fix___ensemble"]: + method = "md_npt_aniso" + dof.append("CellShape") else: - method = 'md_npt_iso' - md_method = 'MolecularDynamics' - raw = input_dict['fix___ensemble'].split() + method = "md_npt_iso" + md_method = "MolecularDynamics" + raw = input_dict["fix___ensemble"].split() temp = float(raw[3]) press = float(raw[7]) - ensemble = 'IsothermalisobaricEnsemble' + ensemble = "IsothermalisobaricEnsemble" mdict = {} - mdict['method'] = md_method - mdict['temperature'] = temp - mdict['pressure'] = press - mdict['dof'] = dof - mdict['ensemble'] = ensemble - mdict['id'] = job.id - - #now process potential + mdict["method"] = md_method + mdict["temperature"] = temp + mdict["pressure"] = press + mdict["dof"] = dof + mdict["ensemble"] = ensemble + mdict["id"] = job.id + + # now process potential inpdict = job.input.to_dict() - ps = inpdict['potential_inp/data_dict']['Value'][0] - name = inpdict['potential_inp/potential/Name'] - potstr = job.input.to_dict()['potential_inp/potential/Citations'] + ps = inpdict["potential_inp/data_dict"]["Value"][0] + name = inpdict["potential_inp/potential/Name"] + potstr = job.input.to_dict()["potential_inp/potential/Citations"] potdict = ast.literal_eval(potstr[1:-1]) url = None - if 'url' in potdict[list(potdict.keys())[0]].keys(): - url = potdict[list(potdict.keys())[0]]['url'] + if "url" in potdict[list(potdict.keys())[0]].keys(): + url = potdict[list(potdict.keys())[0]]["url"] - mdict['potential'] = {} - mdict['potential']['type'] = ps - mdict['potential']['label'] = name + mdict["potential"] = {} + mdict["potential"]["type"] = ps + mdict["potential"]["label"] = name if url is not None: - mdict['potential']['uri'] = url + mdict["potential"]["uri"] = url else: - mdict['potential']['uri'] = name + mdict["potential"]["uri"] = name - - mdict['workflow_manager'] = {} - mdict['workflow_manager']['uri'] = "http://demo.fiz-karlsruhe.de/matwerk/E457491" - mdict['workflow_manager']['label'] = "pyiron" - #and finally code details + mdict["workflow_manager"] = {} + mdict["workflow_manager"]["uri"] = "http://demo.fiz-karlsruhe.de/matwerk/E457491" + mdict["workflow_manager"]["label"] = "pyiron" + # and finally code details - - software = {'uri':"http://demo.fiz-karlsruhe.de/matwerk/E447986", - 'label':'LAMMPS'} - mdict['software'] = [software] + software = { + "uri": "http://demo.fiz-karlsruhe.de/matwerk/E447986", + "label": "LAMMPS", + } + mdict["software"] = [software] - #finally add calculated quantities + # finally add calculated quantities quantdict = extract_calculated_quantities(job) - mdict['outputs'] = quantdict + mdict["outputs"] = quantdict return mdict @@ -129,18 +137,25 @@ def extract_calculated_quantities(job): aen = np.mean(job.output.energy_tot) avol = np.mean(job.output.volume) outputs = [] - outputs.append({'label': 'TotalEnergy', - 'value': np.round(aen, decimals=4), - 'unit': 'EV', - 'associate_to_sample': True}) - outputs.append({'label': 'TotalVolume', - 'value': np.round(avol, decimals=4), - 'unit': 'ANGSTROM3', - 'associate_to_sample': True}) + outputs.append( + { + "label": "TotalEnergy", + "value": np.round(aen, decimals=4), + "unit": "EV", + "associate_to_sample": True, + } + ) + outputs.append( + { + "label": "TotalVolume", + "value": np.round(avol, decimals=4), + "unit": "ANGSTROM3", + "associate_to_sample": True, + } + ) return outputs - def inform_graph(pr, kg): """ Update project to add extra creator functions @@ -148,71 +163,79 @@ def inform_graph(pr, kg): try: from pyiron_base import Creator, PyironFactory - from pyiron_atomistics.atomistics.structure.atoms import ase_to_pyiron, pyiron_to_ase + from pyiron_atomistics.atomistics.structure.atoms import ( + ase_to_pyiron, + pyiron_to_ase, + ) import pyiron_atomistics.atomistics.structure.factory as sf except ImportError: - raise ImportError('Please install pyiron_base and pyiron_atomistics') + raise ImportError("Please install pyiron_base and pyiron_atomistics") class AnnotatedStructureFactory: def __init__(self, graph): self._graph = graph - def bulk(self, + def bulk( + self, element, - repetitions=None, + repetitions=None, crystalstructure=None, a=None, covera=None, cubic=True, - graph=None): + graph=None, + ): if crystalstructure is None: - crystalstructure = element_dict[element]['structure'] + crystalstructure = element_dict[element]["structure"] if a is None: - a = element_dict[element]['lattice_constant'] - - struct = _make_crystal(crystalstructure, + a = element_dict[element]["lattice_constant"] + + struct = _make_crystal( + crystalstructure, repetitions=repetitions, lattice_constant=a, - ca_ratio = covera, - element = element, - primitive = not cubic, + ca_ratio=covera, + element=element, + primitive=not cubic, graph=self._graph, - ) - + ) + ase_structure = struct.write.ase() pyiron_structure = ase_to_pyiron(ase_structure) - pyiron_structure.info['sample_id'] = struct.sample + pyiron_structure.info["sample_id"] = struct.sample return pyiron_structure - def grain_boundary(self, + def grain_boundary( + self, element, axis, sigma, gb_plane, - repetitions = (1,1,1), + repetitions=(1, 1, 1), crystalstructure=None, a=1, overlap=0.0, graph=None, - ): + ): - struct = self._graph._annotated_make_grain_boundary(axis, + struct = self._graph._annotated_make_grain_boundary( + axis, sigma, gb_plane, - structure = crystalstructure, + structure=crystalstructure, element=element, lattice_constant=a, repetitions=repetitions, overlap=overlap, - graph=self._graph) + graph=self._graph, + ) ase_structure = struct.write.ase() pyiron_structure = ase_to_pyiron(ase_structure) - pyiron_structure.info['sample_id'] = struct.sample + pyiron_structure.info["sample_id"] = struct.sample return pyiron_structure - class StructureFactory(sf.StructureFactory): def __init__(self, graph): super().__init__() @@ -222,7 +245,6 @@ def __init__(self, graph): def annotated_structure(self): return self._annotated_structure - class StructureCreator(Creator): def __init__(self, project): super().__init__(project) @@ -231,7 +253,6 @@ def __init__(self, project): @property def structure(self): return self._structure - + pr.graph = kg pr._creator = StructureCreator(pr) - \ No newline at end of file diff --git a/atomrdf/workflow/workflow.py b/atomrdf/workflow/workflow.py index 17014e6..4c2d0da 100644 --- a/atomrdf/workflow/workflow.py +++ b/atomrdf/workflow/workflow.py @@ -25,16 +25,15 @@ from atomrdf.structure import System -#Move imports to another file +# Move imports to another file from atomrdf.namespace import PROV, CMSO, PODO, ASMO -#custom imports as needed +# custom imports as needed import atomrdf.workflow.pyiron as pi class Workflow: - def __init__(self, kg, - environment='pyiron'): + def __init__(self, kg, environment="pyiron"): """ Initialize the workflow environment @@ -46,28 +45,30 @@ def __init__(self, kg, """ self.kg = kg - if environment == 'pyiron': + if environment == "pyiron": self.wenv = pi else: - raise ValueError('unknown workflow environment') + raise ValueError("unknown workflow environment") def _prepare_job(self, workflow_object): self.wenv._check_if_job_is_valid(workflow_object) - parent_structure, parent_sample, structure, sample = self.wenv._add_structures(workflow_object) + parent_structure, parent_sample, structure, sample = self.wenv._add_structures( + workflow_object + ) method_dict = self.wenv._identify_method(workflow_object) if (structure is None) and (sample is None): - raise ValueError('Either structure or sample should be specified') + raise ValueError("Either structure or sample should be specified") if sample is None: - #its not added to graph yet + # its not added to graph yet structure.graph = self.kg structure.to_graph() sample = structure.sample - + if parent_sample is None: - #its not added to graph yet + # its not added to graph yet if parent_structure is not None: parent_structure.graph = self.kg parent_structure.to_graph() @@ -76,24 +77,34 @@ def _prepare_job(self, workflow_object): self.structure = structure self.sample = sample self.mdict = method_dict - self.main_id = method_dict['id'] + self.main_id = method_dict["id"] self.parent_sample = parent_sample - def _get_lattice_properties(self, ): + def _get_lattice_properties( + self, + ): if self.parent_sample is None: return - - material = list([k[2] for k in self.kg.triples((self.parent_sample, CMSO.hasMaterial, None))])[0] + + material = list( + [ + k[2] + for k in self.kg.triples((self.parent_sample, CMSO.hasMaterial, None)) + ] + )[0] crystal_structure = self.kg.value(material, CMSO.hasStructure) - + altname = self.kg.value(crystal_structure, CMSO.hasAltName) - + space_group = self.kg.value(crystal_structure, CMSO.hasSpaceGroup) space_group_symbol = self.kg.value(space_group, CMSO.hasSpaceGroupSymbol) space_group_number = self.kg.value(space_group, CMSO.hasSpaceGroupNumber) unit_cell = self.kg.value(crystal_structure, CMSO.hasUnitCell) - blattice = self.kg.value(unit_cell, Namespace("http://purls.helmholtz-metadaten.de/cmso/").hasBravaisLattice) + blattice = self.kg.value( + unit_cell, + Namespace("http://purls.helmholtz-metadaten.de/cmso/").hasBravaisLattice, + ) lattice_parameter = self.kg.value(unit_cell, CMSO.hasLatticeParameter) lattice_parameter_x = self.kg.value(lattice_parameter, CMSO.hasLength_x) @@ -105,43 +116,62 @@ def _get_lattice_properties(self, ): lattice_angle_y = self.kg.value(lattice_angle, CMSO.hasAngle_beta) lattice_angle_z = self.kg.value(lattice_angle, CMSO.hasAngle_gamma) - targets = [altname, space_group_symbol, space_group_number, blattice, - [lattice_parameter_x, lattice_parameter_y, lattice_parameter_z], - [lattice_angle_x, lattice_angle_y, lattice_angle_z]] + targets = [ + altname, + space_group_symbol, + space_group_number, + blattice, + [lattice_parameter_x, lattice_parameter_y, lattice_parameter_z], + [lattice_angle_x, lattice_angle_y, lattice_angle_z], + ] self.structure._add_crystal_structure(targets=targets) - - def _add_inherited_properties(self, ): - #Here we need to add inherited info: CalculatedProperties will be lost - #Defects will be inherited + def _add_inherited_properties( + self, + ): + # Here we need to add inherited info: CalculatedProperties will be lost + # Defects will be inherited if self.parent_sample is None: return - parent_material = list([k[2] for k in self.kg.triples((self.parent_sample, CMSO.hasMaterial, None))])[0] - parent_defects = list([x[2] for x in self.kg.triples((parent_material, CMSO.hasDefect, None))]) - #now for each defect we copy add this to the final sample - material = list([k[2] for k in self.kg.triples((self.sample, CMSO.hasMaterial, None))])[0] + parent_material = list( + [ + k[2] + for k in self.kg.triples((self.parent_sample, CMSO.hasMaterial, None)) + ] + )[0] + parent_defects = list( + [x[2] for x in self.kg.triples((parent_material, CMSO.hasDefect, None))] + ) + # now for each defect we copy add this to the final sample + material = list( + [k[2] for k in self.kg.triples((self.sample, CMSO.hasMaterial, None))] + )[0] for defect in parent_defects: new_defect = URIRef(defect.toPython()) self.kg.add((material, CMSO.hasDefect, new_defect)) - #now fetch all defect based info + # now fetch all defect based info for triple in self.kg.triples((defect, None, None)): self.kg.add((new_defect, triple[1], triple[2])) - #now add the special props for vacancy + # now add the special props for vacancy parent_simcell = self.kg.value(self.sample, CMSO.hasSimulationCell) - simcell = self.kg.value(self.parent_sample, CMSO.hasSimulationCell) - - for triple in self.kg.triples((parent_simcell, PODO.hasVacancyConcentration, None)): + simcell = self.kg.value(self.parent_sample, CMSO.hasSimulationCell) + + for triple in self.kg.triples( + (parent_simcell, PODO.hasVacancyConcentration, None) + ): self.kg.add((simcell, triple[1], triple[2])) - for triple in self.kg.triples((parent_simcell, PODO.hasNumberOfVacancies, None)): + for triple in self.kg.triples( + (parent_simcell, PODO.hasNumberOfVacancies, None) + ): self.kg.add((simcell, triple[1], triple[2])) - - - def add_structural_relation(self, ): + def add_structural_relation( + self, + ): self.kg.add((self.sample, RDF.type, PROV.Entity)) if self.parent_sample is not None: self.kg.add((self.parent_sample, RDF.type, PROV.Entity)) @@ -149,8 +179,9 @@ def add_structural_relation(self, ): self._get_lattice_properties() self._add_inherited_properties() - - def add_method(self, ): + def add_method( + self, + ): """ mdict ----- @@ -159,127 +190,186 @@ def add_method(self, ): """ if self.mdict is None: return - - #add activity - #---------------------------------------------------------- - activity = URIRef(f'activity_{self.main_id}') + + # add activity + # ---------------------------------------------------------- + activity = URIRef(f"activity_{self.main_id}") self.kg.add((activity, RDF.type, PROV.Activity)) - #add method - #---------------------------------------------------------- - method = URIRef(f'method_{self.main_id}') - if self.mdict['method'] == 'MolecularStatics': + # add method + # ---------------------------------------------------------- + method = URIRef(f"method_{self.main_id}") + if self.mdict["method"] == "MolecularStatics": self.kg.add((method, RDF.type, ASMO.MolecularStatics)) - elif self.mdict['method'] == 'MolecularDynamics': + elif self.mdict["method"] == "MolecularDynamics": self.kg.add((method, RDF.type, ASMO.MolecularDynamics)) - elif self.mdict['method'] == 'DensityFunctionalTheory': + elif self.mdict["method"] == "DensityFunctionalTheory": self.kg.add((method, RDF.type, ASMO.DensityFunctionalTheory)) self.kg.add((activity, ASMO.hasComputationalMethod, method)) - #choose if its rigid energy or structure optimisation - #---------------------------------------------------------- - if len(self.mdict['dof']) == 0: - self.kg.add((activity, RDF.type, Namespace("http://purls.helmholtz-metadaten.de/asmo/").RigidEnergyCalculation)) + # choose if its rigid energy or structure optimisation + # ---------------------------------------------------------- + if len(self.mdict["dof"]) == 0: + self.kg.add( + ( + activity, + RDF.type, + Namespace( + "http://purls.helmholtz-metadaten.de/asmo/" + ).RigidEnergyCalculation, + ) + ) else: self.kg.add((activity, RDF.type, ASMO.StructureOptimization)) - #add DOFs - for dof in self.mdict['dof']: + # add DOFs + for dof in self.mdict["dof"]: self.kg.add((activity, ASMO.hasRelaxationDOF, getattr(ASMO, dof))) - #add method specific items - if self.mdict['method'] in ['MolecularStatics', 'MolecularDynamics']: + # add method specific items + if self.mdict["method"] in ["MolecularStatics", "MolecularDynamics"]: self._add_md(method, activity) - elif self.mdict['method'] in ['DensityFunctionalTheory']: + elif self.mdict["method"] in ["DensityFunctionalTheory"]: self._add_dft(method, activity) - #add that structure was generated + # add that structure was generated self.kg.add((self.sample, PROV.wasGeneratedBy, activity)) self._add_inputs(activity) self._add_outputs(activity) - self._add_software(method) - + self._add_software(method) def to_graph(self, workflow_object): self._prepare_job(workflow_object) self.add_structural_relation() self.add_method() - def _add_outputs(self, activity): - if 'outputs' in self.mdict.keys(): - for out in self.mdict['outputs']: - prop = self.kg.create_node(f'{self.main_id}_{out["label"]}', CMSO.CalculatedProperty) - self.kg.add((prop, RDFS.label, Literal(out['label']))) + if "outputs" in self.mdict.keys(): + for out in self.mdict["outputs"]: + prop = self.kg.create_node( + f'{self.main_id}_{out["label"]}', CMSO.CalculatedProperty + ) + self.kg.add((prop, RDFS.label, Literal(out["label"]))) self.kg.add((prop, ASMO.hasValue, Literal(out["value"]))) if "unit" in out.keys(): - unit = out['unit'] - self.kg.add((prop, ASMO.hasUnit, URIRef(f'http://qudt.org/vocab/unit/{unit}'))) + unit = out["unit"] + self.kg.add( + ( + prop, + ASMO.hasUnit, + URIRef(f"http://qudt.org/vocab/unit/{unit}"), + ) + ) self.kg.add((prop, ASMO.wasCalculatedBy, activity)) - if out['associate_to_sample']: + if out["associate_to_sample"]: self.kg.add((self.sample, CMSO.hasCalculatedProperty, prop)) def _add_inputs(self, activity): - if 'inputs' in self.mdict.keys(): - for inp in self.mdict['inputs']: - prop = self.kg.create_node(f'{self.main_id}_{inp["label"]}', ASMO.InputParameter) - self.kg.add((prop, RDFS.label, Literal(inp['label']))) + if "inputs" in self.mdict.keys(): + for inp in self.mdict["inputs"]: + prop = self.kg.create_node( + f'{self.main_id}_{inp["label"]}', ASMO.InputParameter + ) + self.kg.add((prop, RDFS.label, Literal(inp["label"]))) self.kg.add((prop, ASMO.hasValue, Literal(inp["value"]))) if "unit" in inp.keys(): - unit = inp['unit'] - self.kg.add((prop, ASMO.hasUnit, URIRef(f'http://qudt.org/vocab/unit/{unit}'))) + unit = inp["unit"] + self.kg.add( + ( + prop, + ASMO.hasUnit, + URIRef(f"http://qudt.org/vocab/unit/{unit}"), + ) + ) self.kg.add((activity, ASMO.hasInputParameter, prop)) def _add_software(self, method): - #finally add software + # finally add software wfagent = None - if 'workflow_manager' in self.mdict.keys(): - wfagent = self.kg.create_node(self.mdict["workflow_manager"]['uri'], PROV.SoftwareAgent) - self.kg.add((wfagent, RDFS.label, Literal(self.mdict["workflow_manager"]['label']))) + if "workflow_manager" in self.mdict.keys(): + wfagent = self.kg.create_node( + self.mdict["workflow_manager"]["uri"], PROV.SoftwareAgent + ) + self.kg.add( + (wfagent, RDFS.label, Literal(self.mdict["workflow_manager"]["label"])) + ) self.kg.add((method, PROV.wasAssociatedWith, wfagent)) - for software in self.mdict['software']: - agent = self.kg.create_node(software['uri'], PROV.SoftwareAgent) - self.kg.add((agent, RDFS.label, Literal(software['label']))) + for software in self.mdict["software"]: + agent = self.kg.create_node(software["uri"], PROV.SoftwareAgent) + self.kg.add((agent, RDFS.label, Literal(software["label"]))) if wfagent is not None: self.kg.add((wfagent, PROV.actedOnBehalfOf, agent)) else: self.kg.add((method, PROV.wasAssociatedWith, agent)) - def _add_md(self, method, activity): - self.kg.add((method, ASMO.hasStatisticalEnsemble, getattr(ASMO, self.mdict['ensemble']))) - - #add temperature if needed - if self.mdict['temperature'] is not None: - temperature = self.kg.create_node(f'temperature_{self.main_id}', ASMO.InputParameter) - self.kg.add((temperature, RDFS.label, Literal('temperature', datatype=XSD.string))) + self.kg.add( + (method, ASMO.hasStatisticalEnsemble, getattr(ASMO, self.mdict["ensemble"])) + ) + + # add temperature if needed + if self.mdict["temperature"] is not None: + temperature = self.kg.create_node( + f"temperature_{self.main_id}", ASMO.InputParameter + ) + self.kg.add( + (temperature, RDFS.label, Literal("temperature", datatype=XSD.string)) + ) self.kg.add((activity, ASMO.hasInputParameter, temperature)) - self.kg.add((temperature, ASMO.hasValue, Literal(self.mdict['temperature'], datatype=XSD.float))) - self.kg.add((temperature, ASMO.hasUnit, URIRef('http://qudt.org/vocab/unit/K'))) - - if self.mdict['pressure'] is not None: - pressure = self.kg.create_node(f'pressure_{self.main_id}', ASMO.InputParameter) - self.kg.add((pressure, RDFS.label, Literal('pressure', datatype=XSD.string))) + self.kg.add( + ( + temperature, + ASMO.hasValue, + Literal(self.mdict["temperature"], datatype=XSD.float), + ) + ) + self.kg.add( + (temperature, ASMO.hasUnit, URIRef("http://qudt.org/vocab/unit/K")) + ) + + if self.mdict["pressure"] is not None: + pressure = self.kg.create_node( + f"pressure_{self.main_id}", ASMO.InputParameter + ) + self.kg.add( + (pressure, RDFS.label, Literal("pressure", datatype=XSD.string)) + ) self.kg.add((activity, ASMO.hasInputParameter, pressure)) - self.kg.add((pressure, ASMO.hasValue, Literal(self.mdict['pressure'], datatype=XSD.float))) - self.kg.add((pressure, ASMO.hasUnit, URIRef('http://qudt.org/vocab/unit/GigaPA'))) - - #potentials need to be mapped - potential = URIRef(f'potential_{self.main_id}') - if 'meam' in self.mdict['potential']['type']: + self.kg.add( + ( + pressure, + ASMO.hasValue, + Literal(self.mdict["pressure"], datatype=XSD.float), + ) + ) + self.kg.add( + (pressure, ASMO.hasUnit, URIRef("http://qudt.org/vocab/unit/GigaPA")) + ) + + # potentials need to be mapped + potential = URIRef(f"potential_{self.main_id}") + if "meam" in self.mdict["potential"]["type"]: self.kg.add((potential, RDF.type, ASMO.ModifiedEmbeddedAtomModel)) - elif 'eam' in self.mdict['potential']['type']: + elif "eam" in self.mdict["potential"]["type"]: self.kg.add((potential, RDF.type, ASMO.EmbeddedAtomModel)) - elif 'lj' in self.mdict['potential']['type']: + elif "lj" in self.mdict["potential"]["type"]: self.kg.add((potential, RDF.type, ASMO.LennardJonesPotential)) - elif 'ace' in self.mdict['potential']['type']: + elif "ace" in self.mdict["potential"]["type"]: self.kg.add((potential, RDF.type, ASMO.MachineLearningPotential)) else: self.kg.add((potential, RDF.type, ASMO.InteratomicPotential)) - if 'uri' in self.mdict['potential'].keys(): - self.kg.add((potential, CMSO.hasReference, Literal(self.mdict['potential']['uri'], datatype=XSD.string))) - if 'label' in self.mdict['potential'].keys(): - self.kg.add((potential, RDFS.label, Literal(self.mdict['potential']['label']))) + if "uri" in self.mdict["potential"].keys(): + self.kg.add( + ( + potential, + CMSO.hasReference, + Literal(self.mdict["potential"]["uri"], datatype=XSD.string), + ) + ) + if "label" in self.mdict["potential"].keys(): + self.kg.add( + (potential, RDFS.label, Literal(self.mdict["potential"]["label"])) + ) self.kg.add((method, ASMO.hasInteratomicPotential, potential))