From 6c5c13a216802e313cb8664a28671b02a6c618a5 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 15:04:33 +0100 Subject: [PATCH 01/15] add identifiers --- pyscal_rdf/graph.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index f68fb4a..06b61c6 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -14,6 +14,7 @@ import copy import pandas as pd import yaml +import uuid from pyscal_rdf.visualize import visualize_graph from pyscal_rdf.network.network import OntologyNetwork @@ -612,7 +613,8 @@ def add_atoms(self, name=None): self.add((self.sample, CMSO.hasAttribute, position)) self.add((position, RDF.type, CMSO.AtomAttribute)) self.add((position, CMSO.hasName, Literal('Position', data_type=XSD.string))) - self.add(()) + position_identifier = uuid.uuid4() + self.add((position, CMSO.hasIdentifier, Literal(position_identifier, datatype=XSD.string))) if "species" in self.sys.atoms.keys(): uname = None @@ -621,8 +623,9 @@ def add_atoms(self, name=None): species = BNode(uname) self.add((self.sample, CMSO.hasAttribute, species)) self.add((species, RDF.type, CMSO.AtomAttribute)) - self.add((species, CMSO.hasName, Literal('Species', data_type=XSD.string)) - self.add(()) + self.add((species, CMSO.hasName, Literal('Species', data_type=XSD.string))) + species_identifier = uuid.uuid4() + self.add((species, CMSO.hasIdentifier, Literal(species_identifier, datatype=XSD.string))) if "velocities" in self.sys.atoms.keys(): uname = None @@ -631,8 +634,9 @@ def add_atoms(self, name=None): velocity = BNode(uname) self.add((self.sample, CMSO.hasAttribute, velocity)) self.add((velocity, RDF.type, CMSO.AtomAttribute)) - self.add((velocity, CMSO.hasName, Literal('Velocity', data_type=XSD.string)) - self.add(()) + self.add((velocity, CMSO.hasName, Literal('Velocity', data_type=XSD.string))) + velocity_identifier = uuid.uuid4() + self.add((velocity, CMSO.hasIdentifier, Literal(velocity_identifier, datatype=XSD.string))) if "forces" in self.sys.atoms.keys(): uname = None @@ -641,8 +645,9 @@ def add_atoms(self, name=None): force = BNode(uname) self.add((self.sample, CMSO.hasAttribute, force)) self.add((force, RDF.type, CMSO.AtomAttribute)) - self.add((force, CMSO.hasName, Literal('Force', data_type=XSD.string)) - self.add(()) + self.add((force, CMSO.hasName, Literal('Force', data_type=XSD.string))) + force_identifier = uuid.uuid4() + self.add((force, CMSO.hasIdentifier, Literal(force_identifier, datatype=XSD.string))) From 8e35a46e55b79c856479c86bf1756e9b6c5b3902 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 15:05:56 +0100 Subject: [PATCH 02/15] add unique identifier --- pyscal_rdf/graph.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 06b61c6..5fc247a 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -605,6 +605,9 @@ def add_atoms(self, name=None): Note that for the moment, we will dump the structures in a given folder, maybe this could be input from the Job class directly """ + #start a path to store the data + #samples are BNodes, so names may not be unique, therefore we create one + if "positions" in self.sys.atoms.keys(): uname = None if name is not None: From bc5084e57bdfaca46ff0b0a154fa6396750edb6e Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 15:11:10 +0100 Subject: [PATCH 03/15] update graph --- pyscal_rdf/graph.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 5fc247a..88b9f89 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -271,7 +271,9 @@ def add(self, triple): if str(triple[2].toPython()) != 'None': self.graph.add(triple) - def add_structure_to_graph(self, structure, names=True, name_index=None, format=None): + def add_structure_to_graph(self, structure, names=True, + name_index=None, + format=None): """ Add a given :py:class:`pyscal.core.System` to the Graph object @@ -286,6 +288,9 @@ def add_structure_to_graph(self, structure, names=True, name_index=None, format= Returns ------- None + + Notes + ----- """ self.process_structure(structure, format=format) #now add to graph @@ -607,7 +612,7 @@ def add_atoms(self, name=None): """ #start a path to store the data #samples are BNodes, so names may not be unique, therefore we create one - + if "positions" in self.sys.atoms.keys(): uname = None if name is not None: From b02e4dc5336c39783469f3632b5a418677b363eb Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 15:25:04 +0100 Subject: [PATCH 04/15] update graph for uriref prep --- pyscal_rdf/graph.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 88b9f89..de01457 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -271,7 +271,9 @@ def add(self, triple): if str(triple[2].toPython()) != 'None': self.graph.add(triple) - def add_structure_to_graph(self, structure, names=True, + def add_structure_to_graph(self, + structure, + names=True, name_index=None, format=None): """ @@ -291,17 +293,31 @@ def add_structure_to_graph(self, structure, names=True, Notes ----- + BNodes, or relational nodes will be avoided as much as possible so that merging of datasets would be possible. + Instead URIref containers will be made use of. This makes the `names` and `name_index` parameters crucial. + `names` parameter means that legible names starting with the string `Sample_x` would be used. `x` would ensure + that there is conflict with the current database. However, they do not ensure there is no conflicts when various + graphs are merged together. Hence this value is recommended only for simple, demonstration cases. + + If `names` are False, unique ids are generated which would be id of the sample. These ids use the python `uuid` module + and therefore ensures that the names are always unique. """ + self.process_structure(structure, format=format) + #now add to graph if name_index is None: name_index = self.n_samples + 1 + self.create_graph(names=names, name_index=name_index) structure.sample = self.sample - structure._atom_ids = copy.copy(self._atom_ids) + #structure._atom_ids = copy.copy(self._atom_ids) structure.graph = self - def create_graph(self, names=False, name_index="1"): + def _generate_names(self, names=False, name_index=1): + pass + + def create_graph(self, names=False, name_index=1): """ Create the RDF Graph from the data stored @@ -330,6 +346,7 @@ def create_graph(self, names=False, name_index="1"): None, None, None, None, None, None] + self.add_sample(name=name_list[0]) self.add_material(name=name_list[1]) self.add_chemical_composition(name=name_list[2]) From 788e1278e95f618ff41e3357eb952d75d0bebda4 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 15:27:40 +0100 Subject: [PATCH 05/15] add skeleton for schema --- pyscal_rdf/rdfsystem.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pyscal_rdf/rdfsystem.py b/pyscal_rdf/rdfsystem.py index 35262da..9630289 100644 --- a/pyscal_rdf/rdfsystem.py +++ b/pyscal_rdf/rdfsystem.py @@ -1,5 +1,7 @@ import numpy as np import pyscal3.core as pc +from pyscal3.atoms import AttrSetter + from rdflib import Graph, Literal, Namespace, XSD, RDF, RDFS, BNode, URIRef, FOAF, SKOS, DCTERMS CMSO = Namespace("https://purls.helmholtz-metadaten.de/cmso/") @@ -25,6 +27,13 @@ def __init__(self, filename = None, if source is not None: self.__dict__.update(source.__dict__) + #assign attributes + self.schema = AttrSetter() + mapdict = {} + + self.schema._add_attribute(mapdict) + + def __delitem__(self, val): if isinstance(val, int): val = [val] From f74ae596eefb34aea6428656992f7042a425268a Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 16:35:01 +0100 Subject: [PATCH 06/15] overhaul properties --- pyscal_rdf/properties.py | 165 +++++++++++++++++++++++++++++---------- 1 file changed, 122 insertions(+), 43 deletions(-) diff --git a/pyscal_rdf/properties.py b/pyscal_rdf/properties.py index 4ebd1ee..c7ab86a 100644 --- a/pyscal_rdf/properties.py +++ b/pyscal_rdf/properties.py @@ -1,7 +1,122 @@ import numpy as np import spglib -def get_angle(vec1, vec2): +# DATADICT properties +#------------------------------------------ +bravais_lattice_dict = { + "l12": "https://www.wikidata.org/wiki/Q3006714", + "b2": "https://www.wikidata.org/wiki/Q851536", + "diamond": "https://www.wikidata.org/wiki/Q3006714", + "hcp": "https://www.wikidata.org/wiki/Q663314", + "a15": "a15", + "bcc": "https://www.wikidata.org/wiki/Q851536", + "fcc": "https://www.wikidata.org/wiki/Q3006714", +} + +# SIMCELL properties +#-------------------------------------------- +def get_chemical_composition(system): + return system.composition + +def get_cell_volume(system): + return system.volume + +def get_number_of_atoms(system): + return system.natoms + +def get_simulation_cell_length(system): + return system.box_dimensions + +def get_simulation_cell_vector(system): + return system.box + +def get_simulation_cell_angle(system): + return [_get_angle(system.box[0], system.box[1]), + _get_angle(system.box[1], system.box[2]), + _get_angle(system.box[2], system.box[0])] + +# LATTICE properties +#-------------------------------------------- + +def get_lattice_angle(system): + if system._structure_dict is None: + return None + + return [_get_angle(system._structure_dict["box"][0], system._structure_dict["box"][1]), + _get_angle(system._structure_dict["box"][1], system._structure_dict["box"][2]), + _get_angle(system._structure_dict["box"][2], system._structure_dict["box"][0])] + +def get_lattice_parameter(system): + if system.atoms._lattice_constant is None: + return [None, None, None] + else: + if system._structure_dict is not None: + return [np.linalg.norm(system._structure_dict["box"][0])*system.atoms._lattice_constant, + np.linalg.norm(system._structure_dict["box"][1])*system.atoms._lattice_constant, + np.linalg.norm(system._structure_dict["box"][2])*system.atoms._lattice_constant] + else: + return [system.atoms._lattice_constant, + system.atoms._lattice_constant, + system.atoms._lattice_constant] + +def get_crystal_structure_name(system): + if system._structure_dict is None: + return None + return system.atoms._lattice + +def get_bravais_lattice(system): + if system._structure_dict is None: + return None + if system.atoms._lattice in bravais_lattice_dict.keys(): + return bravais_lattice_dict[system.atoms._lattice] + return None + +def get_basis_positions(system): + if system._structure_dict is None: + return None + return system._structure_dict["positions"] + +def get_basis_occupancy(system): + if system._structure_dict is None: + return None + occ_numbers = system._structure_dict['species'] + tdict = system.atoms._type_dict + vals = [val for key, val in tdict.items()] + + if vals[0] is not None: + occ_numbers = [tdict[x] for x in occ_numbers] + return occ_numbers + +def get_lattice_vectors(system): + if system._structure_dict is None: + return None + return system._structure_dict["box"] + +def get_spacegroup_symbol(system): + if system._structure_dict is None: + return None + results = _get_symmetry_dict(system) + return results["international"] + +def get_spacegroup_number(system): + if system._structure_dict is None: + return None + results = _get_symmetry_dict(system) + return results["number"] + +# ATOM attributes +#-------------------------------------------- +def get_position(system): + return system.atoms.position + +def get_species(system): + return system.atoms.species + + + +# SUPPORT functions +#-------------------------------------------- +def _get_angle(vec1, vec2): """ Get angle between two vectors in degrees @@ -25,50 +140,14 @@ def get_angle(vec1, vec2): """ return np.round(np.arccos(np.dot(vec1, vec2)/(np.linalg.norm(vec1)*np.linalg.norm(vec2)))*180/np.pi, decimals=2) -def get_coordination(sys): - sys.find.neighbors(method="cutoff") - coordination = [len(x) for x in sys.atoms.neighbors.index] - return coordination - -def get_lattice_vector(sys, cartesian=False): - box = sys.box - return box - -def get_bravais_lattice(sys): - lattice = sys.atoms._lattice - if lattice == "l12": - lattice = "https://www.wikidata.org/wiki/Q3006714" - elif lattice == "b2": - lattice = "https://www.wikidata.org/wiki/Q851536" - elif lattice == "diamond": - lattice = "https://www.wikidata.org/wiki/Q3006714" - elif lattice == "hcp": - lattice = "https://www.wikidata.org/wiki/Q663314" - elif lattice == "a15": - lattice = "a15" - elif lattice == "bcc": - lattice = "https://www.wikidata.org/wiki/Q851536" - elif lattice == "fcc": - lattice = "https://www.wikidata.org/wiki/Q3006714" - return lattice - -def get_space_group(sys): - box = get_lattice_vector(sys) - direct_coordinates = sys._structure_dict['positions'] - atom_types = sys._structure_dict['species'] +def _get_symmetry_dict(system): + box = get_lattice_vector(system) + direct_coordinates = get_basis_positions(system) + atom_types = system._structure_dict['species'] + results = spglib.get_symmetry_dataset((box, direct_coordinates, atom_types)) - return results["international"], results["number"] - -def get_basis(sys): - occ_numbers = sys._structure_dict['species'] - tdict = sys.atoms._type_dict - vals = [val for key, val in tdict.items()] - - if vals[0] is not None: - occ_numbers = [tdict[x] for x in occ_numbers] - return occ_numbers - + return results["international"], results["number"] From 90842c596c5e205951962ab544ed596c6719d325 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 17:00:34 +0100 Subject: [PATCH 07/15] add better data access --- pyscal_rdf/properties.py | 3 --- pyscal_rdf/rdfsystem.py | 30 +++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/pyscal_rdf/properties.py b/pyscal_rdf/properties.py index c7ab86a..5253b51 100644 --- a/pyscal_rdf/properties.py +++ b/pyscal_rdf/properties.py @@ -148,6 +148,3 @@ def _get_symmetry_dict(system): results = spglib.get_symmetry_dataset((box, direct_coordinates, atom_types)) return results["international"], results["number"] - - - diff --git a/pyscal_rdf/rdfsystem.py b/pyscal_rdf/rdfsystem.py index 9630289..ebd432b 100644 --- a/pyscal_rdf/rdfsystem.py +++ b/pyscal_rdf/rdfsystem.py @@ -1,6 +1,9 @@ import numpy as np +from functools import partial, update_wrapper + import pyscal3.core as pc from pyscal3.atoms import AttrSetter +import pyscal_rdf.properties as prp from rdflib import Graph, Literal, Namespace, XSD, RDF, RDFS, BNode, URIRef, FOAF, SKOS, DCTERMS @@ -29,7 +32,32 @@ def __init__(self, filename = None, #assign attributes self.schema = AttrSetter() - mapdict = {} + mapdict = { + "material": { + "element_ratio": partial(prp.get_chemical_composition, self), + "crystal_structure": { + "name": partial(prp.get_crystal_structure_name, self), + "spacegroup_symbol": partial(prp.get_spacegroup_symbol, self), + "spacegroup_number": partial(prp.get_spacegroup_number, self), + "unit_cell": { + "bravais_lattice": partial(prp.get_bravais_lattice, self), + "lattice_parameter": partial(prp.get_lattice_parameter, self), + "angle": partial(prp.get_lattice_angle, self), + }, + }, + }, + "simulation_cell": { + "volume": partial(prp.get_cell_volume, self), + "number_of_atoms": partial(prp.get_number_of_atoms, self), + "length": partial(prp.get_simulation_cell_length, self), + "vector": partial(prp.get_simulation_cell_vector, self), + "angle": partial(prp.get_simulation_cell_angle, self), + }, + "atom_attribute": { + "position": partial(prp.get_position, self), + "species": partial(prp.get_species, self), + }, + } self.schema._add_attribute(mapdict) From 21121f9f497a37435f4a3585f1041a1067a0c706 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 17:33:08 +0100 Subject: [PATCH 08/15] remove data --- pyscal_rdf/graph.py | 255 ++++++++++++++------------------------------ 1 file changed, 78 insertions(+), 177 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index de01457..5a89ae1 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -147,126 +147,12 @@ def process_structure(self, structure, format=None): """ if isinstance(structure, System): #self.sysdict = convert_to_dict(structure) - self.sys = structure + self.system = structure elif os.path.exists(structure): sys = System(structure, format=format) #self.sysdict = convert_to_dict(sys) - self.sys = sys - - def data(self, key): - #this method gets info directly from the dict - if key=="ChemicalComposition": - return self.sys.composition - elif key=="CellVolume": - return self.sys.volume - elif key=="NumberOfAtoms": - return self.sys.natoms - elif key=="SimulationCellLengthX": - return self.sys.box_dimensions[0] - elif key=="SimulationCellLengthY": - return self.sys.box_dimensions[1] - elif key=="SimulationCellLengthZ": - return self.sys.box_dimensions[2] - - elif key=="SimulationCellVectorA": - return self.sys.box[0] - elif key=="SimulationCellVectorB": - return self.sys.box[1] - elif key=="SimulationCellVectorC": - return self.sys.box[2] - - elif key=="SimulationCellAngleAlpha": - return prp.get_angle(self.sys.box[0], self.sys.box[1]) - elif key=="SimulationCellAngleBeta": - return prp.get_angle(self.sys.box[1], self.sys.box[2]) - elif key=="SimulationCellAngleGamma": - return prp.get_angle(self.sys.box[2], self.sys.box[0]) - - elif key=="LatticeAngleAlpha": - if self.sys._structure_dict is not None: - return prp.get_angle(self.sys._structure_dict["box"][0], self.sys._structure_dict["box"][1]) - return None - elif key=="LatticeAngleBeta": - if self.sys._structure_dict is not None: - return prp.get_angle(self.sys._structure_dict["box"][1], self.sys._structure_dict["box"][2]) - return None - elif key=="LatticeAngleGamma": - if self.sys._structure_dict is not None: - return prp.get_angle(self.sys._structure_dict["box"][2], self.sys._structure_dict["box"][0]) - return None - - elif key=="Element": - if self.sys.atoms.species[0] is not None: - return self.sys.atoms.species - else: - return self.sys.atoms.types - - elif key=="Coordination": - return prp.get_coordination(self.sys) + self.system = sys - elif key=="Positions": - return self.sys.atoms.positions - - elif key=="LatticeParameter": - if self.sys.atoms._lattice_constant is None: - return [None, None, None] - else: - if self.sys._structure_dict is not None: - return [np.linalg.norm(self.sys._structure_dict["box"][0])*self.sys.atoms._lattice_constant, - np.linalg.norm(self.sys._structure_dict["box"][1])*self.sys.atoms._lattice_constant, - np.linalg.norm(self.sys._structure_dict["box"][2])*self.sys.atoms._lattice_constant] - else: - return [self.sys.atoms._lattice_constant, - self.sys.atoms._lattice_constant, - self.sys.atoms._lattice_constant] - - elif key=="SpaceGroupSymbol": - if self.sys._structure_dict is not None: - symbol, number = prp.get_space_group(self.sys) - return symbol - - elif key=="SpaceGroupNumber": - if self.sys._structure_dict is not None: - symbol, number = prp.get_space_group(self.sys) - return number - else: - return None - - elif key=="CrystalStructureName": - if self.sys._structure_dict is not None: - return self.sys.atoms._lattice - else: - return None - - elif key=="BravaisLattice": - if self.sys._structure_dict is not None: - return prp.get_bravais_lattice(self.sys) - else: - return None - - elif key=="BasisPositions": - if self.sys._structure_dict is not None: - return self.sys._structure_dict['positions'] - else: - return None - - elif key=="BasisOccupancy": - if self.sys._structure_dict is not None: - return prp.get_basis(self.sys) - else: - return None - - elif key=="LatticeVectors": - if self.sys._structure_dict is not None: - return self.sys_structure_dict["box"] - else: - return None - - #if self.sysdict is not None: - # if key in self.sysdict: - # return self.sysdict[key] - return None - def add(self, triple): if str(triple[2].toPython()) != 'None': self.graph.add(triple) @@ -385,9 +271,9 @@ def add_sample(self, name=None): ------- """ - sample_01 = BNode(name) - self.add((sample_01, RDF.type, CMSO.AtomicScaleSample)) - self.sample = sample_01 + sample = BNode(name) + self.add((sample, RDF.type, CMSO.AtomicScaleSample)) + self.sample = sample def add_material(self, name=None): """ @@ -402,10 +288,10 @@ def add_material(self, name=None): ------- """ - material_01 = BNode(name) - self.add((self.sample, CMSO.hasMaterial, material_01)) - self.add((material_01, RDF.type, CMSO.CrystallineMaterial)) - self.material = material_01 + material = BNode(name) + self.add((self.sample, CMSO.hasMaterial, material)) + self.add((material, RDF.type, CMSO.CrystallineMaterial)) + self.material = material def add_chemical_composition(self, name=None): """ @@ -419,7 +305,7 @@ def add_chemical_composition(self, name=None): Returns ------- """ - composition = self.data("ChemicalComposition") + composition = self.system.schema.material.element_ratio() chemical_species = BNode(name) self.add((self.sample, CMSO.hasSpecies, chemical_species)) @@ -444,12 +330,16 @@ def add_simulation_cell(self, name=None): ------- """ - simulation_cell_01 = BNode(name) - self.add((self.sample, CMSO.hasSimulationCell, simulation_cell_01)) - self.add((simulation_cell_01, RDF.type, CMSO.SimulationCell)) - self.add((simulation_cell_01, CMSO.hasVolume, Literal(np.round(self.data("CellVolume"), decimals=2), datatype=XSD.float))) - self.add((self.sample, CMSO.hasNumberOfAtoms, Literal(self.data("NumberOfAtoms"), datatype=XSD.integer))) - self.simulation_cell = simulation_cell_01 + simulation_cell = BNode(name) + self.add((self.sample, CMSO.hasSimulationCell, simulation_cell)) + self.add((simulation_cell, RDF.type, CMSO.SimulationCell)) + self.add((simulation_cell, CMSO.hasVolume, + Literal(np.round(self.system.schema.simulation_cell.volume(), decimals=2), + datatype=XSD.float))) + self.add((self.sample, CMSO.hasNumberOfAtoms, + Literal(self.system.schema.simulation_cell.number_of_atoms(), + datatype=XSD.integer))) + self.simulation_cell = simulation_cell def add_simulation_cell_properties(self, name=None): @@ -469,22 +359,24 @@ def add_simulation_cell_properties(self, name=None): uname = None if name is not None: uname = f'{name}Length' - simulation_cell_length_01 = BNode(uname) - self.add((self.simulation_cell, CMSO.hasLength, simulation_cell_length_01)) - self.add((simulation_cell_length_01, RDF.type, CMSO.SimulationCellLength)) - self.add((simulation_cell_length_01, CMSO.hasLength_x, Literal(self.data("SimulationCellLengthX"), datatype=XSD.float))) - self.add((simulation_cell_length_01, CMSO.hasLength_y, Literal(self.data("SimulationCellLengthY"), datatype=XSD.float))) - self.add((simulation_cell_length_01, CMSO.hasLength_z, Literal(self.data("SimulationCellLengthZ"), datatype=XSD.float))) + simulation_cell_length = BNode(uname) + self.add((self.simulation_cell, CMSO.hasLength, simulation_cell_length)) + data = self.system.schema.simulation_cell.length() + self.add((simulation_cell_length, RDF.type, CMSO.SimulationCellLength)) + self.add((simulation_cell_length, CMSO.hasLength_x, Literal(data[0], datatype=XSD.float))) + self.add((simulation_cell_length, CMSO.hasLength_y, Literal(data[1], datatype=XSD.float))) + self.add((simulation_cell_length, CMSO.hasLength_z, Literal(data[2], datatype=XSD.float))) uname = None if name is not None: uname = f'{name}Vector01' simulation_cell_vector_01 = BNode(uname) + data = self.system.schema.simulation_cell.vector() self.add((self.simulation_cell, CMSO.hasVector, simulation_cell_vector_01)) self.add((simulation_cell_vector_01, RDF.type, CMSO.SimulationCellVector)) - self.add((simulation_cell_vector_01, CMSO.hasComponent_x, Literal(self.data("SimulationCellVectorA")[0], datatype=XSD.float))) - self.add((simulation_cell_vector_01, CMSO.hasComponent_y, Literal(self.data("SimulationCellVectorA")[1], datatype=XSD.float))) - self.add((simulation_cell_vector_01, CMSO.hasComponent_z, Literal(self.data("SimulationCellVectorA")[2], datatype=XSD.float))) + self.add((simulation_cell_vector_01, CMSO.hasComponent_x, Literal(data[0][0], datatype=XSD.float))) + self.add((simulation_cell_vector_01, CMSO.hasComponent_y, Literal(data[0][1], datatype=XSD.float))) + self.add((simulation_cell_vector_01, CMSO.hasComponent_z, Literal(data[0][2], datatype=XSD.float))) uname = None if name is not None: @@ -492,9 +384,9 @@ def add_simulation_cell_properties(self, name=None): simulation_cell_vector_02 = BNode(uname) self.add((self.simulation_cell, CMSO.hasVector, simulation_cell_vector_02)) self.add((simulation_cell_vector_02, RDF.type, CMSO.SimulationCellVector)) - self.add((simulation_cell_vector_02, CMSO.hasComponent_x, Literal(self.data("SimulationCellVectorB")[0], datatype=XSD.float))) - self.add((simulation_cell_vector_02, CMSO.hasComponent_y, Literal(self.data("SimulationCellVectorB")[1], datatype=XSD.float))) - self.add((simulation_cell_vector_02, CMSO.hasComponent_z, Literal(self.data("SimulationCellVectorB")[2], datatype=XSD.float))) + self.add((simulation_cell_vector_02, CMSO.hasComponent_x, Literal(data[1][0], datatype=XSD.float))) + self.add((simulation_cell_vector_02, CMSO.hasComponent_y, Literal(data[1][1], datatype=XSD.float))) + self.add((simulation_cell_vector_02, CMSO.hasComponent_z, Literal(data[1][2], datatype=XSD.float))) uname = None if name is not None: @@ -502,19 +394,20 @@ def add_simulation_cell_properties(self, name=None): simulation_cell_vector_03 = BNode(uname) self.add((self.simulation_cell, CMSO.hasVector, simulation_cell_vector_03)) self.add((simulation_cell_vector_03, RDF.type, CMSO.SimulationCellVector)) - self.add((simulation_cell_vector_03, CMSO.hasComponent_x, Literal(self.data("SimulationCellVectorC")[0], datatype=XSD.float))) - self.add((simulation_cell_vector_03, CMSO.hasComponent_y, Literal(self.data("SimulationCellVectorC")[1], datatype=XSD.float))) - self.add((simulation_cell_vector_03, CMSO.hasComponent_z, Literal(self.data("SimulationCellVectorC")[2], datatype=XSD.float))) + self.add((simulation_cell_vector_03, CMSO.hasComponent_x, Literal(data[2][0], datatype=XSD.float))) + self.add((simulation_cell_vector_03, CMSO.hasComponent_y, Literal(data[2][1], datatype=XSD.float))) + self.add((simulation_cell_vector_03, CMSO.hasComponent_z, Literal(data[2][2], datatype=XSD.float))) uname = None if name is not None: uname = f'{name}Angle' - simulation_cell_angle_01 = BNode(uname) - self.add((self.simulation_cell, CMSO.hasAngle, simulation_cell_angle_01)) - self.add((simulation_cell_angle_01, RDF.type, CMSO.SimulationCellAngle)) - self.add((simulation_cell_angle_01, CMSO.hasAngle_alpha, Literal(self.data("SimulationCellAngleAlpha"), datatype=XSD.float))) - self.add((simulation_cell_angle_01, CMSO.hasAngle_beta, Literal(self.data("SimulationCellAngleBeta"), datatype=XSD.float))) - self.add((simulation_cell_angle_01, CMSO.hasAngle_gamma, Literal(self.data("SimulationCellAngleGamma"), datatype=XSD.float))) + simulation_cell_angle = BNode(uname) + data = self.system.schema.simulation_cell.angle() + self.add((self.simulation_cell, CMSO.hasAngle, simulation_cell_angle)) + self.add((simulation_cell_angle, RDF.type, CMSO.SimulationCellAngle)) + self.add((simulation_cell_angle, CMSO.hasAngle_alpha, Literal(data[0], datatype=XSD.float))) + self.add((simulation_cell_angle, CMSO.hasAngle_beta, Literal(data[1], datatype=XSD.float))) + self.add((simulation_cell_angle, CMSO.hasAngle_gamma, Literal(data[2], datatype=XSD.float))) def add_crystal_structure(self, name=None): @@ -530,11 +423,13 @@ def add_crystal_structure(self, name=None): ------- """ - crystal_structure_01 = BNode(name) - self.add((self.material, CMSO.hasStructure, crystal_structure_01)) - self.add((crystal_structure_01, RDF.type, CMSO.CrystalStructure)) - self.add((crystal_structure_01, CMSO.hasAltName, Literal(self.data("CrystalStructureName"), datatype=XSD.string))) - self.crystal_structure = crystal_structure_01 + crystal_structure = BNode(name) + self.add((self.material, CMSO.hasStructure, crystal_structure)) + self.add((crystal_structure, RDF.type, CMSO.CrystalStructure)) + self.add((crystal_structure, CMSO.hasAltName, + Literal(self.system.schema.material.crystal_structure.name(), + datatype=XSD.string))) + self.crystal_structure = crystal_structure def add_space_group(self, name=None): """ @@ -548,8 +443,12 @@ def add_space_group(self, name=None): Returns ------- """ - self.add((self.crystal_structure, CMSO.hasSpaceGroupSymbol, Literal(self.data("SpaceGroupSymbol"), datatype=XSD.string))) - self.add((self.crystal_structure, CMSO.hasSpaceGroupNumber, Literal(self.data("SpaceGroupNumber"), datatype=XSD.integer))) + self.add((self.crystal_structure, CMSO.hasSpaceGroupSymbol, + Literal(self.system.schema.material.crystal_structure.spacegroup_symbol(), + datatype=XSD.string))) + self.add((self.crystal_structure, CMSO.hasSpaceGroupNumber, + Literal(self.system.schema.material.crystal_structure.spacegroup_number(), + datatype=XSD.integer))) def add_unit_cell(self, name=None): @@ -565,15 +464,15 @@ def add_unit_cell(self, name=None): ------- """ - unit_cell_01 = BNode(name) - self.add((self.crystal_structure, CMSO.hasUnitCell, unit_cell_01)) - self.add((unit_cell_01, RDF.type, CMSO.UnitCell)) - self.unit_cell = unit_cell_01 + unit_cell = BNode(name) + self.add((self.crystal_structure, CMSO.hasUnitCell, unit_cell)) + self.add((unit_cell, RDF.type, CMSO.UnitCell)) + self.unit_cell = unit_cell #add bravais lattice - bv = None - if self.data("BravaisLattice") is not None: - bv = URIRef(self.data("BravaisLattice")) + bv = self.system.schema.material.crystal_structure.unit_cell.bravais_lattice() + if bv is not None: + bv = URIRef(bv) self.add((self.unit_cell, CMSO.hasBravaisLattice, bv)) def add_lattice_properties(self, name=None): @@ -593,22 +492,24 @@ def add_lattice_properties(self, name=None): uname = None if name is not None: uname = f'{name}LatticeParameter' - lattice_parameter_01 = BNode(uname) - self.add((self.unit_cell, CMSO.hasLatticeParamter, lattice_parameter_01)) - self.add((lattice_parameter_01, RDF.type, CMSO.LatticeParameter)) - self.add((lattice_parameter_01, CMSO.hasLength_x, Literal(self.data("LatticeParameter")[0], datatype=XSD.float))) - self.add((lattice_parameter_01, CMSO.hasLength_y, Literal(self.data("LatticeParameter")[1], datatype=XSD.float))) - self.add((lattice_parameter_01, CMSO.hasLength_z, Literal(self.data("LatticeParameter")[2], datatype=XSD.float))) + data = self.system.schema.material.crystal_structure.unit_cell.lattice_parameter() + lattice_parameter = BNode(uname) + self.add((self.unit_cell, CMSO.hasLatticeParamter, lattice_parameter)) + self.add((lattice_parameter, RDF.type, CMSO.LatticeParameter)) + self.add((lattice_parameter, CMSO.hasLength_x, Literal(data[0], datatype=XSD.float))) + self.add((lattice_parameter, CMSO.hasLength_y, Literal(data[1], datatype=XSD.float))) + self.add((lattice_parameter, CMSO.hasLength_z, Literal(data[2], datatype=XSD.float))) uname = None if name is not None: uname = f'{name}LatticeAngle' - lattice_angle_01 = BNode(uname) - self.add((self.unit_cell, CMSO.hasAngle, lattice_angle_01)) - self.add((lattice_angle_01, RDF.type, CMSO.LatticeAngle)) - self.add((lattice_angle_01, CMSO.hasAngle_alpha, Literal(self.data("LatticeAngleAlpha"), datatype=XSD.float))) - self.add((lattice_angle_01, CMSO.hasAngle_beta, Literal(self.data("LatticeAngleBeta"), datatype=XSD.float))) - self.add((lattice_angle_01, CMSO.hasAngle_gamma, Literal(self.data("LatticeAngleGamma"), datatype=XSD.float))) + lattice_angle = BNode(uname) + data = self.system.schema.material.crystal_structure.unit_cell.angle() + self.add((self.unit_cell, CMSO.hasAngle, lattice_angle)) + self.add((lattice_angle, RDF.type, CMSO.LatticeAngle)) + self.add((lattice_angle, CMSO.hasAngle_alpha, Literal(data[0], datatype=XSD.float))) + self.add((lattice_angle, CMSO.hasAngle_beta, Literal(data[1], datatype=XSD.float))) + self.add((lattice_angle, CMSO.hasAngle_gamma, Literal(data[2], datatype=XSD.float))) def add_atoms(self, name=None): """ From adc5306bab1483118db2fef7ba1985dcc3e36f3f Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 17:35:12 +0100 Subject: [PATCH 09/15] further fixes to remove data --- pyscal_rdf/graph.py | 70 ++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 5a89ae1..c608147 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -553,27 +553,27 @@ def add_atoms(self, name=None): species_identifier = uuid.uuid4() self.add((species, CMSO.hasIdentifier, Literal(species_identifier, datatype=XSD.string))) - if "velocities" in self.sys.atoms.keys(): - uname = None - if name is not None: - uname = f'{name}_Velocity' - velocity = BNode(uname) - self.add((self.sample, CMSO.hasAttribute, velocity)) - self.add((velocity, RDF.type, CMSO.AtomAttribute)) - self.add((velocity, CMSO.hasName, Literal('Velocity', data_type=XSD.string))) - velocity_identifier = uuid.uuid4() - self.add((velocity, CMSO.hasIdentifier, Literal(velocity_identifier, datatype=XSD.string))) - - if "forces" in self.sys.atoms.keys(): - uname = None - if name is not None: - uname = f'{name}_Force' - force = BNode(uname) - self.add((self.sample, CMSO.hasAttribute, force)) - self.add((force, RDF.type, CMSO.AtomAttribute)) - self.add((force, CMSO.hasName, Literal('Force', data_type=XSD.string))) - force_identifier = uuid.uuid4() - self.add((force, CMSO.hasIdentifier, Literal(force_identifier, datatype=XSD.string))) + #if "velocities" in self.sys.atoms.keys(): + # uname = None + # if name is not None: + # uname = f'{name}_Velocity' + # velocity = BNode(uname) + # self.add((self.sample, CMSO.hasAttribute, velocity)) + # self.add((velocity, RDF.type, CMSO.AtomAttribute)) + # self.add((velocity, CMSO.hasName, Literal('Velocity', data_type=XSD.string))) + # velocity_identifier = uuid.uuid4() + # self.add((velocity, CMSO.hasIdentifier, Literal(velocity_identifier, datatype=XSD.string))) + + #if "forces" in self.sys.atoms.keys(): + # uname = None + # if name is not None: + # uname = f'{name}_Force' + # force = BNode(uname) + # self.add((self.sample, CMSO.hasAttribute, force)) + # self.add((force, RDF.type, CMSO.AtomAttribute)) + # self.add((force, CMSO.hasName, Literal('Force', data_type=XSD.string))) + # force_identifier = uuid.uuid4() + # self.add((force, CMSO.hasIdentifier, Literal(force_identifier, datatype=XSD.string))) @@ -596,27 +596,27 @@ def add_gb(self, gb_dict, name=None): #mark that the structure has a defect - plane_defect_01 = BNode(name) - self.add((self.material, CMSO.hasDefect, plane_defect_01)) + plane_defect = BNode(name) + self.add((self.material, CMSO.hasDefect, plane_defect)) if gb_dict["GBType"] is None: - self.add((plane_defect_01, RDF.type, PLDO.GrainBoundary)) + self.add((plane_defect, RDF.type, PLDO.GrainBoundary)) elif gb_dict["GBType"] == "Twist": - self.add((plane_defect_01, RDF.type, PLDO.TwistGrainBoundary)) + self.add((plane_defect, RDF.type, PLDO.TwistGrainBoundary)) elif gb_dict["GBType"] == "Tilt": - self.add((plane_defect_01, RDF.type, PLDO.TiltGrainBoundary)) + self.add((plane_defect, RDF.type, PLDO.TiltGrainBoundary)) elif gb_dict["GBType"] == "Symmetric Tilt": - self.add((plane_defect_01, RDF.type, PLDO.SymmetricalTiltGrainBoundary)) + self.add((plane_defect, RDF.type, PLDO.SymmetricalTiltGrainBoundary)) elif gb_dict["GBType"] == "Mixed": - self.add((plane_defect_01, RDF.type, PLDO.MixedGrainBoundary)) - self.add((plane_defect_01, PLDO.hasSigmaValue, Literal(gb_dict["sigma"], datatype=XSD.integer))) + self.add((plane_defect, RDF.type, PLDO.MixedGrainBoundary)) + self.add((plane_defect, PLDO.hasSigmaValue, Literal(gb_dict["sigma"], datatype=XSD.integer))) #now mark that the defect is GB #uname = None #if name is not None: # uname = f'{name}GrainBoundaryPlane' #gb_plane_01 = BNode(uname) - self.add((plane_defect_01, PLDO.hasGBPlane, Literal(gb_dict["GBPlane"], + self.add((plane_defect, PLDO.hasGBPlane, Literal(gb_dict["GBPlane"], datatype=XSD.string))) #self.add((gb_plane_01, RDF.type, PLDO.GrainBoundaryPlane)) #self.add((gb_plane_01, PLDO.hasMillerIndices, Literal(gb_dict["GBPlane"], @@ -626,7 +626,7 @@ def add_gb(self, gb_dict, name=None): #if name is not None: # uname = f'{name}RotationAxis' #rotation_axis_01 = BNode(uname) - self.add((plane_defect_01, PLDO.hasRotationAxis, Literal(gb_dict["RotationAxis"], + self.add((plane_defect, PLDO.hasRotationAxis, Literal(gb_dict["RotationAxis"], datatype=XSD.string))) #self.add((rotation_axis_01, RDF.type, PLDO.RotationAxis)) #self.add((rotation_axis_01, PLDO.hasComponentX, Literal(gb_dict["RotationAxis"][0], datatype=XSD.float))) @@ -637,7 +637,7 @@ def add_gb(self, gb_dict, name=None): #if name is not None: # uname = f'{name}MisorientationAngle' #misorientation_angle_01 = BNode(uname) - self.add((plane_defect_01, PLDO.hasMisorientationAngle, Literal(gb_dict["MisorientationAngle"], datatype=XSD.float))) + self.add((plane_defect, PLDO.hasMisorientationAngle, Literal(gb_dict["MisorientationAngle"], datatype=XSD.float))) #self.add((misorientation_angle_01, RDF.type, PLDO.MisorientationAngle)) #self.add((misorientation_angle_01, PLDO.hasAngle, Literal(gb_dict["MisorientationAngle"], datatype=XSD.float))) @@ -657,9 +657,9 @@ def add_vacancy(self, concentration, number=None, name=None): ------- """ - vacancy_01 = BNode(name) - self.add((self.material, CMSO.hasDefect, vacancy_01)) - self.add((vacancy_01, RDF.type, PODO.Vacancy)) + vacancy = BNode(name) + self.add((self.material, CMSO.hasDefect, vacancy)) + self.add((vacancy, RDF.type, PODO.Vacancy)) self.add((self.simulation_cell, PODO.hasVacancyConcentration, Literal(concentration, datatype=XSD.float))) if number is not None: self.add((self.simulation_cell, PODO.hasNumberOfVacancies, Literal(number, datatype=XSD.integer))) From 3eed167fbaf3a2d9e2cf63f5e0433aa8fba2bbd8 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 17:47:17 +0100 Subject: [PATCH 10/15] fix minor bugs --- pyscal_rdf/graph.py | 12 +++++++----- pyscal_rdf/properties.py | 6 +++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index c608147..0f96df1 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -111,7 +111,7 @@ def __init__(self, graph_file=None, raise ValueError("store should be pyiron_project, SQLAlchemy, or Memory") #start the storage - _set_structure_store(self.structure_store) + _setup_structure_store(self.structure_store) #start binding self.graph.bind("cmso", CMSO) @@ -314,6 +314,8 @@ def add_chemical_composition(self, name=None): for e, r in composition.keys(): if e in element_indetifiers.keys(): element = URIRef(element_indetifiers[e]) + self.add((chemical_species, CMSO.hasElement, element)) + self.add((element, RDF.type, CMSO.Element)) self.add((element, CMSO.hasSymbol, Literal(e, datatype=XSD.string))) self.add((element, CMSO.hasElementRatio, Literal(r, datatype=XSD.float))) @@ -531,25 +533,25 @@ def add_atoms(self, name=None): #start a path to store the data #samples are BNodes, so names may not be unique, therefore we create one - if "positions" in self.sys.atoms.keys(): + if "positions" in self.system.atoms.keys(): uname = None if name is not None: uname = f'{name}_Position' position = BNode(uname) self.add((self.sample, CMSO.hasAttribute, position)) self.add((position, RDF.type, CMSO.AtomAttribute)) - self.add((position, CMSO.hasName, Literal('Position', data_type=XSD.string))) + self.add((position, CMSO.hasName, Literal('Position', datatype=XSD.string))) position_identifier = uuid.uuid4() self.add((position, CMSO.hasIdentifier, Literal(position_identifier, datatype=XSD.string))) - if "species" in self.sys.atoms.keys(): + if "species" in self.system.atoms.keys(): uname = None if name is not None: uname = f'{name}_Species' species = BNode(uname) self.add((self.sample, CMSO.hasAttribute, species)) self.add((species, RDF.type, CMSO.AtomAttribute)) - self.add((species, CMSO.hasName, Literal('Species', data_type=XSD.string))) + self.add((species, CMSO.hasName, Literal('Species', datatype=XSD.string))) species_identifier = uuid.uuid4() self.add((species, CMSO.hasIdentifier, Literal(species_identifier, datatype=XSD.string))) diff --git a/pyscal_rdf/properties.py b/pyscal_rdf/properties.py index 5253b51..8e54c8a 100644 --- a/pyscal_rdf/properties.py +++ b/pyscal_rdf/properties.py @@ -87,7 +87,7 @@ def get_basis_occupancy(system): occ_numbers = [tdict[x] for x in occ_numbers] return occ_numbers -def get_lattice_vectors(system): +def get_lattice_vector(system): if system._structure_dict is None: return None return system._structure_dict["box"] @@ -96,13 +96,13 @@ def get_spacegroup_symbol(system): if system._structure_dict is None: return None results = _get_symmetry_dict(system) - return results["international"] + return results[0] def get_spacegroup_number(system): if system._structure_dict is None: return None results = _get_symmetry_dict(system) - return results["number"] + return results[1] # ATOM attributes #-------------------------------------------- From 05f62611d0913d619d1366700a21189991b01437 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 18:45:00 +0100 Subject: [PATCH 11/15] update graphs --- pyscal_rdf/data/element.yml | 6 +++--- pyscal_rdf/graph.py | 4 ++-- pyscal_rdf/visualize.py | 23 +++++++++++++++++------ 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/pyscal_rdf/data/element.yml b/pyscal_rdf/data/element.yml index 271fa97..577984e 100644 --- a/pyscal_rdf/data/element.yml +++ b/pyscal_rdf/data/element.yml @@ -12,9 +12,9 @@ Co: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:27638 Cr: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:28073 Cs: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:30514 Cu: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:28694 -Dy: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=33377 -Er: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=33379 -Eu: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=32999 +Dy: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:33377 +Er: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:33379 +Eu: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:32999 Fe: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:18248 Gd: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:33375 Ge: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:30441 diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 0f96df1..4cf55b4 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -310,8 +310,8 @@ def add_chemical_composition(self, name=None): chemical_species = BNode(name) self.add((self.sample, CMSO.hasSpecies, chemical_species)) self.add((chemical_species, RDF.type, CMSO.ChemicalSpecies)) - - for e, r in composition.keys(): + + for e, r in composition.items(): if e in element_indetifiers.keys(): element = URIRef(element_indetifiers[e]) self.add((chemical_species, CMSO.hasElement, element)) diff --git a/pyscal_rdf/visualize.py b/pyscal_rdf/visualize.py index beac451..26252e5 100644 --- a/pyscal_rdf/visualize.py +++ b/pyscal_rdf/visualize.py @@ -8,12 +8,23 @@ def get_title_from_BNode(x): return x.toPython() -def get_string_from_URI(x, ): - raw = x.toPython().split("#") - if len(raw)>1: - return raw[-1] - else: - return ".".join(x.toPython().split("/")[-2:]) +def get_string_from_URI(x): + raw = x.toPython() + #first try splitting by # + rawsplit = raw.split("#") + if len(rawsplit) > 1: + return rawsplit[-1] + #try splitting by = for chebi values + if 'CHEBI' in raw: + rawsplit = raw.split("=") + rawsplit = rawsplit[-1].split(":") + if len(rawsplit) > 1: + return ".".join(rawsplit[-2:]) + + #just a normal url split now + rawsplit = raw.split("/") + if len(rawsplit) > 1: + return ".".join(rawsplit[-2:]) def parse_object(x): if isinstance(x, BNode): From ae4362d9719ec1ce996b8f14b38a19dae6507aa6 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 19:00:15 +0100 Subject: [PATCH 12/15] remove names --- pyscal_rdf/graph.py | 57 +++++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 35 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 4cf55b4..592e983 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -191,19 +191,19 @@ def add_structure_to_graph(self, self.process_structure(structure, format=format) - #now add to graph - if name_index is None: - name_index = self.n_samples + 1 - - self.create_graph(names=names, name_index=name_index) + if names: + if name_index is None: + name_index = self.n_samples + 1 + self._name = f'sample_{name_index}' + else: + self._name = uuid.uuid4() + + self.create_graph() structure.sample = self.sample #structure._atom_ids = copy.copy(self._atom_ids) structure.graph = self - def _generate_names(self, names=False, name_index=1): - pass - - def create_graph(self, names=False, name_index=1): + def create_graph(self): """ Create the RDF Graph from the data stored @@ -218,32 +218,19 @@ def create_graph(self, names=False, name_index=1): Returns ------- None - """ - - if names: - name_list = [f'Sample_{name_index}', f'Material_{name_index}', - f'ChemicalComposition_{name_index}', f'SimulationCell_{name_index}', - f'SimulationCell_{name_index}', f'CrystalStructure_{name_index}', - f'SpaceGroup_{name_index}', f'UnitCell_{name_index}', - f'UnitCell_{name_index}', f'Atom_{name_index}'] - else: - name_list = [None, None, - None, None, - None, None, - None, None, - None, None] - - self.add_sample(name=name_list[0]) - self.add_material(name=name_list[1]) - self.add_chemical_composition(name=name_list[2]) - self.add_simulation_cell(name=name_list[3]) - self.add_simulation_cell_properties(name=name_list[4]) - self.add_crystal_structure(name=name_list[5]) - self.add_space_group(name=name_list[6]) - self.add_unit_cell(name=name_list[7]) - self.add_lattice_properties(name=name_list[8]) - self.add_atoms(name=name_list[9]) - + """ + self.add_sample() + self.add_material() + self.add_chemical_composition() + self.add_simulation_cell() + self.add_simulation_cell_properties() + self.add_crystal_structure() + self.add_space_group() + self.add_unit_cell() + self.add_lattice_properties() + self.add_atoms() + + #extra triples self.add((CMSO.SimulationCellLength, RDFS.subClassOf, CMSO.Length)) self.add((CMSO.LatticeParameter, RDFS.subClassOf, CMSO.Length)) self.add((CMSO.Length, CMSO.hasUnit, URIRef("https://qudt.org/2.1/vocab/unit#ANGSTROM"))) From bce40c2d57237f47760914d8e317d453a1000a64 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 19:14:31 +0100 Subject: [PATCH 13/15] remove BNodes --- pyscal_rdf/graph.py | 100 +++++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 61 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 592e983..65bbc57 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -245,7 +245,7 @@ def create_graph(self): self.add((CMSO.Vector, CMSO.hasUnit, URIRef("https://qudt.org/2.1/vocab/unit#ANGSTROM"))) - def add_sample(self, name=None): + def add_sample(self): """ Add a CMSO Sample object @@ -262,7 +262,7 @@ def add_sample(self, name=None): self.add((sample, RDF.type, CMSO.AtomicScaleSample)) self.sample = sample - def add_material(self, name=None): + def add_material(self): """ Add a CMSO Material object @@ -280,7 +280,7 @@ def add_material(self, name=None): self.add((material, RDF.type, CMSO.CrystallineMaterial)) self.material = material - def add_chemical_composition(self, name=None): + def add_chemical_composition(self): """ Add chemical composition @@ -294,7 +294,7 @@ def add_chemical_composition(self, name=None): """ composition = self.system.schema.material.element_ratio() - chemical_species = BNode(name) + chemical_species = URIRef(f'{self._name}_ChemicalSpecies') self.add((self.sample, CMSO.hasSpecies, chemical_species)) self.add((chemical_species, RDF.type, CMSO.ChemicalSpecies)) @@ -306,7 +306,7 @@ def add_chemical_composition(self, name=None): self.add((element, CMSO.hasSymbol, Literal(e, datatype=XSD.string))) self.add((element, CMSO.hasElementRatio, Literal(r, datatype=XSD.float))) - def add_simulation_cell(self, name=None): + def add_simulation_cell(self): """ Add a CMSO SimulationCell @@ -319,7 +319,7 @@ def add_simulation_cell(self, name=None): ------- """ - simulation_cell = BNode(name) + simulation_cell = URIRef(f'{self._name}_SimulationCell') self.add((self.sample, CMSO.hasSimulationCell, simulation_cell)) self.add((simulation_cell, RDF.type, CMSO.SimulationCell)) self.add((simulation_cell, CMSO.hasVolume, @@ -331,7 +331,7 @@ def add_simulation_cell(self, name=None): self.simulation_cell = simulation_cell - def add_simulation_cell_properties(self, name=None): + def add_simulation_cell_properties(self): """ Add a CMSO SimulationCell properties such as SimulationCellLength, and Vectors. @@ -344,11 +344,7 @@ def add_simulation_cell_properties(self, name=None): Returns ------- """ - - uname = None - if name is not None: - uname = f'{name}Length' - simulation_cell_length = BNode(uname) + simulation_cell_length = URIRef(f'{self._name}_SimulationCellLength') self.add((self.simulation_cell, CMSO.hasLength, simulation_cell_length)) data = self.system.schema.simulation_cell.length() self.add((simulation_cell_length, RDF.type, CMSO.SimulationCellLength)) @@ -356,10 +352,7 @@ def add_simulation_cell_properties(self, name=None): self.add((simulation_cell_length, CMSO.hasLength_y, Literal(data[1], datatype=XSD.float))) self.add((simulation_cell_length, CMSO.hasLength_z, Literal(data[2], datatype=XSD.float))) - uname = None - if name is not None: - uname = f'{name}Vector01' - simulation_cell_vector_01 = BNode(uname) + simulation_cell_vector_01 = URIRef(f'{self._name}_SimulationCellVector_1') data = self.system.schema.simulation_cell.vector() self.add((self.simulation_cell, CMSO.hasVector, simulation_cell_vector_01)) self.add((simulation_cell_vector_01, RDF.type, CMSO.SimulationCellVector)) @@ -367,30 +360,21 @@ def add_simulation_cell_properties(self, name=None): self.add((simulation_cell_vector_01, CMSO.hasComponent_y, Literal(data[0][1], datatype=XSD.float))) self.add((simulation_cell_vector_01, CMSO.hasComponent_z, Literal(data[0][2], datatype=XSD.float))) - uname = None - if name is not None: - uname = f'{name}Vector02' - simulation_cell_vector_02 = BNode(uname) + simulation_cell_vector_02 = URIRef(f'{self._name}_SimulationCellVector_2') self.add((self.simulation_cell, CMSO.hasVector, simulation_cell_vector_02)) self.add((simulation_cell_vector_02, RDF.type, CMSO.SimulationCellVector)) self.add((simulation_cell_vector_02, CMSO.hasComponent_x, Literal(data[1][0], datatype=XSD.float))) self.add((simulation_cell_vector_02, CMSO.hasComponent_y, Literal(data[1][1], datatype=XSD.float))) self.add((simulation_cell_vector_02, CMSO.hasComponent_z, Literal(data[1][2], datatype=XSD.float))) - uname = None - if name is not None: - uname = f'{name}Vector03' - simulation_cell_vector_03 = BNode(uname) + simulation_cell_vector_03 = URIRef(f'{self._name}_SimulationCellVector_3') self.add((self.simulation_cell, CMSO.hasVector, simulation_cell_vector_03)) self.add((simulation_cell_vector_03, RDF.type, CMSO.SimulationCellVector)) self.add((simulation_cell_vector_03, CMSO.hasComponent_x, Literal(data[2][0], datatype=XSD.float))) self.add((simulation_cell_vector_03, CMSO.hasComponent_y, Literal(data[2][1], datatype=XSD.float))) self.add((simulation_cell_vector_03, CMSO.hasComponent_z, Literal(data[2][2], datatype=XSD.float))) - uname = None - if name is not None: - uname = f'{name}Angle' - simulation_cell_angle = BNode(uname) + simulation_cell_angle = URIRef(f'{self._name}_SimulationCellAngle') data = self.system.schema.simulation_cell.angle() self.add((self.simulation_cell, CMSO.hasAngle, simulation_cell_angle)) self.add((simulation_cell_angle, RDF.type, CMSO.SimulationCellAngle)) @@ -399,7 +383,7 @@ def add_simulation_cell_properties(self, name=None): self.add((simulation_cell_angle, CMSO.hasAngle_gamma, Literal(data[2], datatype=XSD.float))) - def add_crystal_structure(self, name=None): + def add_crystal_structure(self): """ Add a CMSO Crystal Structure @@ -412,7 +396,7 @@ def add_crystal_structure(self, name=None): ------- """ - crystal_structure = BNode(name) + crystal_structure = URIRef(f'{self._name}_CrystalStructure') self.add((self.material, CMSO.hasStructure, crystal_structure)) self.add((crystal_structure, RDF.type, CMSO.CrystalStructure)) self.add((crystal_structure, CMSO.hasAltName, @@ -420,7 +404,7 @@ def add_crystal_structure(self, name=None): datatype=XSD.string))) self.crystal_structure = crystal_structure - def add_space_group(self, name=None): + def add_space_group(self): """ Add a CMSO Space Group @@ -440,7 +424,7 @@ def add_space_group(self, name=None): datatype=XSD.integer))) - def add_unit_cell(self, name=None): + def add_unit_cell(self): """ Add a CMSO Unit Cell @@ -453,7 +437,7 @@ def add_unit_cell(self, name=None): ------- """ - unit_cell = BNode(name) + unit_cell = URIRef(f'{self._name}_UnitCell') self.add((self.crystal_structure, CMSO.hasUnitCell, unit_cell)) self.add((unit_cell, RDF.type, CMSO.UnitCell)) self.unit_cell = unit_cell @@ -464,7 +448,7 @@ def add_unit_cell(self, name=None): bv = URIRef(bv) self.add((self.unit_cell, CMSO.hasBravaisLattice, bv)) - def add_lattice_properties(self, name=None): + def add_lattice_properties(self): """ Add CMSO lattice properties such as Lattice Parameter, and its lengths and angles. @@ -477,22 +461,15 @@ def add_lattice_properties(self, name=None): Returns ------- """ - - uname = None - if name is not None: - uname = f'{name}LatticeParameter' data = self.system.schema.material.crystal_structure.unit_cell.lattice_parameter() - lattice_parameter = BNode(uname) + lattice_parameter = URIRef(f'{self._name}_LatticeParameter') self.add((self.unit_cell, CMSO.hasLatticeParamter, lattice_parameter)) self.add((lattice_parameter, RDF.type, CMSO.LatticeParameter)) self.add((lattice_parameter, CMSO.hasLength_x, Literal(data[0], datatype=XSD.float))) self.add((lattice_parameter, CMSO.hasLength_y, Literal(data[1], datatype=XSD.float))) self.add((lattice_parameter, CMSO.hasLength_z, Literal(data[2], datatype=XSD.float))) - uname = None - if name is not None: - uname = f'{name}LatticeAngle' - lattice_angle = BNode(uname) + lattice_angle = URIRef(f'{self._name}_LatticeAngle') data = self.system.schema.material.crystal_structure.unit_cell.angle() self.add((self.unit_cell, CMSO.hasAngle, lattice_angle)) self.add((lattice_angle, RDF.type, CMSO.LatticeAngle)) @@ -500,7 +477,7 @@ def add_lattice_properties(self, name=None): self.add((lattice_angle, CMSO.hasAngle_beta, Literal(data[1], datatype=XSD.float))) self.add((lattice_angle, CMSO.hasAngle_gamma, Literal(data[2], datatype=XSD.float))) - def add_atoms(self, name=None): + def add_atoms(self): """ Add Atoms including their species and positions @@ -521,10 +498,7 @@ def add_atoms(self, name=None): #samples are BNodes, so names may not be unique, therefore we create one if "positions" in self.system.atoms.keys(): - uname = None - if name is not None: - uname = f'{name}_Position' - position = BNode(uname) + position = URIRef(f'{self._name}_Position') self.add((self.sample, CMSO.hasAttribute, position)) self.add((position, RDF.type, CMSO.AtomAttribute)) self.add((position, CMSO.hasName, Literal('Position', datatype=XSD.string))) @@ -532,10 +506,7 @@ def add_atoms(self, name=None): self.add((position, CMSO.hasIdentifier, Literal(position_identifier, datatype=XSD.string))) if "species" in self.system.atoms.keys(): - uname = None - if name is not None: - uname = f'{name}_Species' - species = BNode(uname) + species = URIRef(f'{self._name}_Species') self.add((self.sample, CMSO.hasAttribute, species)) self.add((species, RDF.type, CMSO.AtomAttribute)) self.add((species, CMSO.hasName, Literal('Species', datatype=XSD.string))) @@ -567,7 +538,7 @@ def add_atoms(self, name=None): - def add_gb(self, gb_dict, name=None): + def add_gb(self, gb_dict): """ Add GB details which will be annotated using PLDO @@ -583,21 +554,28 @@ def add_gb(self, gb_dict, name=None): ------- """ - #mark that the structure has a defect - - plane_defect = BNode(name) - self.add((self.material, CMSO.hasDefect, plane_defect)) - + #mark that the structure has a defect if gb_dict["GBType"] is None: + plane_defect = URIRef(f'{self._name}_GrainBoundary') self.add((plane_defect, RDF.type, PLDO.GrainBoundary)) + elif gb_dict["GBType"] == "Twist": + plane_defect = URIRef(f'{self._name}_TwistGrainBoundary') self.add((plane_defect, RDF.type, PLDO.TwistGrainBoundary)) + elif gb_dict["GBType"] == "Tilt": + plane_defect = URIRef(f'{self._name}_TiltGrainBoundary') self.add((plane_defect, RDF.type, PLDO.TiltGrainBoundary)) + elif gb_dict["GBType"] == "Symmetric Tilt": + plane_defect = URIRef(f'{self._name}_SymmetricalTiltGrainBoundary') self.add((plane_defect, RDF.type, PLDO.SymmetricalTiltGrainBoundary)) + elif gb_dict["GBType"] == "Mixed": + plane_defect = URIRef(f'{self._name}_MixedGrainBoundary') self.add((plane_defect, RDF.type, PLDO.MixedGrainBoundary)) + + self.add((self.material, CMSO.hasDefect, plane_defect)) self.add((plane_defect, PLDO.hasSigmaValue, Literal(gb_dict["sigma"], datatype=XSD.integer))) #now mark that the defect is GB @@ -630,7 +608,7 @@ def add_gb(self, gb_dict, name=None): #self.add((misorientation_angle_01, RDF.type, PLDO.MisorientationAngle)) #self.add((misorientation_angle_01, PLDO.hasAngle, Literal(gb_dict["MisorientationAngle"], datatype=XSD.float))) - def add_vacancy(self, concentration, number=None, name=None): + def add_vacancy(self, concentration, number=None): """ Add Vacancy details which will be annotated by PODO @@ -646,7 +624,7 @@ def add_vacancy(self, concentration, number=None, name=None): ------- """ - vacancy = BNode(name) + vacancy = URIRef(f'{self._name}_Vacancy') self.add((self.material, CMSO.hasDefect, vacancy)) self.add((vacancy, RDF.type, PODO.Vacancy)) self.add((self.simulation_cell, PODO.hasVacancyConcentration, Literal(concentration, datatype=XSD.float))) @@ -657,7 +635,7 @@ def add_vacancy(self, concentration, number=None, name=None): def add_calculated_quantity(self, propertyname, value, unit=None, sample=None): - prop = BNode() + prop = URIRef(f'{self._name}_{propertyname}') if sample is None: sample = self.sample self.add((sample, CMSO.hasCalculatedProperty, prop)) From 40fae5ed4409b0c299f5c51d212116235c37158b Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 19:15:51 +0100 Subject: [PATCH 14/15] remove stray BNodes --- pyscal_rdf/graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 65bbc57..9167460 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -258,7 +258,7 @@ def add_sample(self): ------- """ - sample = BNode(name) + sample = URIRef(f'{self._name}') self.add((sample, RDF.type, CMSO.AtomicScaleSample)) self.sample = sample @@ -275,7 +275,7 @@ def add_material(self): ------- """ - material = BNode(name) + material = URIRef(f'{self._name}_Material') self.add((self.sample, CMSO.hasMaterial, material)) self.add((material, RDF.type, CMSO.CrystallineMaterial)) self.material = material From f5d8dd0dd93147e33cb3895a8dd2bd40892249ff Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 4 Dec 2023 19:24:31 +0100 Subject: [PATCH 15/15] update visualisation --- pyscal_rdf/graph.py | 2 +- pyscal_rdf/visualize.py | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 9167460..c4123c6 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -196,7 +196,7 @@ def add_structure_to_graph(self, name_index = self.n_samples + 1 self._name = f'sample_{name_index}' else: - self._name = uuid.uuid4() + self._name = str(uuid.uuid4()) self.create_graph() structure.sample = self.sample diff --git a/pyscal_rdf/visualize.py b/pyscal_rdf/visualize.py index 26252e5..39f5c24 100644 --- a/pyscal_rdf/visualize.py +++ b/pyscal_rdf/visualize.py @@ -9,28 +9,37 @@ def get_title_from_BNode(x): return x.toPython() def get_string_from_URI(x): + """ + Extract a presentable string from URI + + Also differentiate between fixed notes and URIs, and assign color + """ raw = x.toPython() #first try splitting by # rawsplit = raw.split("#") if len(rawsplit) > 1: - return rawsplit[-1] + return rawsplit[-1], "URIRef" + #try splitting by = for chebi values if 'CHEBI' in raw: rawsplit = raw.split("=") rawsplit = rawsplit[-1].split(":") if len(rawsplit) > 1: - return ".".join(rawsplit[-2:]) + return ".".join(rawsplit[-2:]), "URIRef" #just a normal url split now rawsplit = raw.split("/") if len(rawsplit) > 1: - return ".".join(rawsplit[-2:]) + return ".".join(rawsplit[-2:]), "URIRef" + + #none of the conditions, worked, which means its a hex string + return raw, "BNode" def parse_object(x): if isinstance(x, BNode): return get_title_from_BNode(x), "BNode" elif isinstance(x, URIRef): - return get_string_from_URI(x), "URIRef" + return get_string_from_URI(x) elif isinstance(x, Literal): return str(x.title()), "Literal"