Skip to content

Commit

Permalink
Merge pull request #84 from pyscal/strict_triple_checking
Browse files Browse the repository at this point in the history
Strict triple checking
  • Loading branch information
srmnitc authored Apr 16, 2024
2 parents 9b92da0 + 0fe7669 commit 94b3917
Show file tree
Hide file tree
Showing 13 changed files with 6,187 additions and 3,640 deletions.
1 change: 1 addition & 0 deletions atomrdf/data/asmo.owl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<terms:creator>https://orcid.org/0000-0001-7564-7990</terms:creator>
<terms:description>ASMO is an ontology that aims to define the concepts needed to describe commonly used atomic scale simulation methods, i.e. density functional theory, molecular dynamics, Monte Carlo methods, etc. ASMO uses the Provenance Ontology (PROV-O) to describe the simulation process.</terms:description>
<terms:title>Atomistic Simulation Methods Ontology (ASMO)</terms:title>
<owl:versionInfo rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0.0.1</owl:versionInfo>
</owl:Ontology>


Expand Down
95 changes: 59 additions & 36 deletions atomrdf/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
object is stored in triplets.
"""

from rdflib import Graph, Literal, Namespace, XSD, RDF, RDFS, BNode, URIRef, FOAF, SKOS, DCTERMS
from rdflib import Graph, Literal, XSD, RDF, RDFS, BNode, URIRef

import os
import numpy as np
Expand All @@ -29,11 +29,7 @@
from atomrdf.stores import create_store
import atomrdf.json_io as json_io



CMSO = Namespace("http://purls.helmholtz-metadaten.de/cmso/")
PLDO = Namespace("http://purls.helmholtz-metadaten.de/pldo/")
PODO = Namespace("http://purls.helmholtz-metadaten.de/podo/")
from atomrdf.namespace import Namespace, CMSO, PLDO, PODO, ASMO

#read element data file
file_location = os.path.dirname(__file__).split('/')
Expand Down Expand Up @@ -112,11 +108,38 @@ def add_structure(self, structure):
structure.graph = self
structure.to_graph()

def _modify_triple(self, triple):
modified_triple = []
for term in triple:
if type(term).__name__ == 'OntoTerm':
modified_triple.append(term.namespace_object)
else:
modified_triple.append(term)
return tuple(modified_triple)

def add(self, triple):
if str(triple[2].toPython()) != 'None':
self.graph.add(triple)

"""
Force assumes that you are passing rdflib terms, defined with
RDFLib Namespace
"""
modified_triple = self._modify_triple(triple)
if str(modified_triple[2].toPython()) != 'None':
self.graph.add(modified_triple)


def triples(self, triple):
modified_triple = self._modify_triple(triple)
return self.graph.triples(modified_triple)

def value(self, arg1, arg2):
modified_double = self._modify_triple((arg1, arg2))
return self.graph.value(modified_double[0], modified_double[1])

def remove(self, triple):
modified_triple = self._modify_triple(triple)
return self.graph.remove(modified_triple)


def _initialize_graph(self):
"""
Create the RDF Graph from the data stored
Expand Down Expand Up @@ -159,19 +182,19 @@ def add_calculated_quantity(self, sample, propertyname, value, unit=None):


def inspect_sample(self):
natoms = self.graph.value(sample, CMSO.hasNumberOfAtoms).toPython()
material = list([k[2] for k in self.graph.triples((sample, CMSO.hasMaterial, None))])[0]
defects = list([k[2] for k in self.graph.triples((material, CMSO.hasDefect, None))])
composition = list([k[2].toPython() for k in self.graph.triples((material, CMSO.hasElementRatio, None))])
crystalstructure = self.graph.value(material, CMSO.hasStructure)
spacegroupsymbol = self.graph.value(crystalstructure, CMSO.hasSpaceGroupSymbol).toPython()

lattice = self.graph.value(sample, CMSO.hasNumberOfAtoms).toPython()
defect_types = list([self.graph.value(d, RDF.type).toPython() for d in defects])
prop_nodes = list([k[2] for k in self.graph.triples((sample, CMSO.hasCalculatedProperty, None))])
props = list([self.graph.value(prop_node, RDFS.label) for prop_node in prop_nodes])
propvals = list([self.graph.value(d, CMSO.hasValue).toPython() for d in prop_nodes])
units = list([self.graph.value(d, CMSO.hasUnit).toPython() for d in prop_nodes])
natoms = self.value(sample, CMSO.hasNumberOfAtoms).toPython()
material = list([k[2] for k in self.triples((sample, CMSO.hasMaterial, None))])[0]
defects = list([k[2] for k in self.triples((material, CMSO.hasDefect, None))])
composition = list([k[2].toPython() for k in self.triples((material, CMSO.hasElementRatio, None))])
crystalstructure = self.value(material, CMSO.hasStructure)
spacegroupsymbol = self.value(crystalstructure, CMSO.hasSpaceGroupSymbol).toPython()

lattice = self.value(sample, CMSO.hasNumberOfAtoms).toPython()
defect_types = list([self.value(d, RDF.type).toPython() for d in defects])
prop_nodes = list([k[2] for k in self.triples((sample, CMSO.hasCalculatedProperty, None))])
props = list([self.value(prop_node, RDFS.label) for prop_node in prop_nodes])
propvals = list([self.value(d, CMSO.hasValue).toPython() for d in prop_nodes])
units = list([self.value(d, CMSO.hasUnit).toPython() for d in prop_nodes])
st = []
st.append(f'Sample with {natoms} atoms.\n')
st.append("Material:\n")
Expand Down Expand Up @@ -330,12 +353,12 @@ def archive(self, package_name, format='turtle', compress=True):

#now go through each sample, and copy the file, at the same time fix the paths
for sample in self.samples:
filepath = self.graph.value(URIRef(f'{sample}_Position'), CMSO.hasPath).toPython()
filepath = self.value(URIRef(f'{sample}_Position'), CMSO.hasPath).toPython()
shutil.copy(filepath, structure_store)

#now we have to remove the old path, and fix new
for val in ['Position', 'Species']:
self.graph.remove((URIRef(f'{sample}_{val}'), CMSO.hasPath, None))
self.remove((URIRef(f'{sample}_{val}'), CMSO.hasPath, None))

#assign corrected path
new_relpath = "/".join(['rdf_structure_store', filepath.split('/')[-1]])
Expand Down Expand Up @@ -428,20 +451,20 @@ def n_samples(self):
Number of samples in the Graph
"""

return len([x for x in self.graph.triples((None, RDF.type, CMSO.AtomicScaleSample))])
return len([x for x in self.triples((None, RDF.type, CMSO.AtomicScaleSample))])

@property
def samples(self):
"""
Returns a list of all Samples in the graph
"""

return [x[0] for x in self.graph.triples((None, RDF.type, CMSO.AtomicScaleSample))]
return [x[0] for x in self.triples((None, RDF.type, CMSO.AtomicScaleSample))]

def iterate_graph(self, item, create_new_graph=False):
if create_new_graph:
self.sgraph = KnowledgeGraph()
triples = list(self.graph.triples((item, None, None)))
triples = list(self.triples((item, None, None)))
for triple in triples:
self.sgraph.graph.add(triple)
self.iterate_graph(triple[2])
Expand All @@ -467,7 +490,7 @@ def get_sample(self, sample, no_atoms=False):

self.iterate_graph(sample, create_new_graph=True)
if no_atoms:
na = self.sgraph.graph.value(sample, CMSO.hasNumberOfAtoms).toPython()
na = self.sgraph.value(sample, CMSO.hasNumberOfAtoms).toPython()
return self.sgraph, na
return self.sgraph

Expand All @@ -486,18 +509,18 @@ def get_system_from_sample(self, sample):
corresponding system
"""

simcell = self.graph.value(sample, CMSO.hasSimulationCell)
simcell = self.value(sample, CMSO.hasSimulationCell)
cell_vectors = [[], [], []]

for s in self.graph.triples((simcell, CMSO.hasVector, None)):
cell_vectors[0].append(self.graph.value(s[2], CMSO.hasComponent_x).toPython())
cell_vectors[1].append(self.graph.value(s[2], CMSO.hasComponent_y).toPython())
cell_vectors[2].append(self.graph.value(s[2], CMSO.hasComponent_z).toPython())
for s in self.triples((simcell, CMSO.hasVector, None)):
cell_vectors[0].append(self.value(s[2], CMSO.hasComponent_x).toPython())
cell_vectors[1].append(self.value(s[2], CMSO.hasComponent_y).toPython())
cell_vectors[2].append(self.value(s[2], CMSO.hasComponent_z).toPython())

#cell_vectors
filepath = self.graph.value(URIRef(f'{sample}_Position'), CMSO.hasPath).toPython()
position_identifier = self.graph.value(URIRef(f'{sample}_Position'), CMSO.hasIdentifier).toPython()
species_identifier = self.graph.value(URIRef(f'{sample}_Species'), CMSO.hasIdentifier).toPython()
filepath = self.value(URIRef(f'{sample}_Position'), CMSO.hasPath).toPython()
position_identifier = self.value(URIRef(f'{sample}_Position'), CMSO.hasIdentifier).toPython()
species_identifier = self.value(URIRef(f'{sample}_Species'), CMSO.hasIdentifier).toPython()

#open the file for reading
with open(filepath, 'r') as fin:
Expand Down
35 changes: 35 additions & 0 deletions atomrdf/namespace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import os
from rdflib import Literal, URIRef
from rdflib import Namespace as RDFLibNamespace
from pyscal3.atoms import AttrSetter

from atomrdf.network.network import OntologyNetwork

class Namespace(AttrSetter, RDFLibNamespace):
def __init__(self, infile, delimiter='/'):
AttrSetter.__init__(self)
self.network = OntologyNetwork(infile, delimiter=delimiter)
#print(type(self.network.onto.tree.base_iri))
#self.namespace = RDFLibNamespace(self.network.onto.tree.base_iri)
RDFLibNamespace.__init__(self.network.onto.tree.base_iri)
#self.namespace = RDFLibNamespace("http://purls.helmholtz-metadaten.de/cmso/")
self.name = self.network.onto.tree.name
mapdict = {}

#now iterate over all attributes
for k1 in ['class', 'object_property', 'data_property']:
for k2, val in self.network.onto.attributes[k1].items():
if val.namespace == self.name:
mapdict[val.name_without_prefix] = val

#add attributes
self._add_attribute(mapdict)


file_location = os.path.dirname(__file__)

CMSO = Namespace(os.path.join(file_location, 'data/cmso.owl'))
PLDO = Namespace(os.path.join(file_location, 'data/pldo.owl'))
PODO = Namespace(os.path.join(file_location, 'data/podo.owl'))
ASMO = Namespace(os.path.join(file_location, 'data/asmo.owl'))
PROV = RDFLibNamespace("http://www.w3.org/ns/prov#")
38 changes: 26 additions & 12 deletions atomrdf/network/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,19 +181,33 @@ def _parse_class_basic(self):
iri = c.iri
#print(iri)
#print(iri)
try:
subclasses = self.tree.search(subclass_of=getattr(self.tree, c.name))
for sb in subclasses:
term = OntoTerm(sb.iri, delimiter=self.delimiter)
term.node_type ='class'
self.attributes['class'][term.name] = term
subclasses = [strip_name(sb.iri, self.delimiter) for sb in subclasses]
classes.append(subclasses)
except:
term = OntoTerm(c.iri, delimiter=self.delimiter)
#CHILDREN
children = self.tree.get_children_of(c)
named_instances = self.tree.get_instances_of(c)
equiv_classes = c.equivalent_to
subclasses = [*children, *named_instances, *equiv_classes]
subclasses.append(c)
for sb in subclasses:
term = OntoTerm(sb.iri, delimiter=self.delimiter)
term.node_type ='class'
self.attributes['class'][term.name] = term
classes.append([strip_name(c.iri, self.delimiter)])
self.attributes['class'][term.name] = term
subclasses = [strip_name(sb.iri, self.delimiter) for sb in subclasses]
classes.append(subclasses)


#try:
# subclasses = self.tree.search(subclass_of=getattr(self.tree, c.name))
# for sb in subclasses:
# term = OntoTerm(sb.iri, delimiter=self.delimiter)
# term.node_type ='class'
# self.attributes['class'][term.name] = term
# subclasses = [strip_name(sb.iri, self.delimiter) for sb in subclasses]
# classes.append(subclasses)
#except:
# term = OntoTerm(c.iri, delimiter=self.delimiter)
# term.node_type ='class'
# self.attributes['class'][term.name] = term
# classes.append([strip_name(c.iri, self.delimiter)])
return classes

def _aggregate_keys(self, dd):
Expand Down
8 changes: 7 additions & 1 deletion atomrdf/network/term.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,10 @@ def uri(self, val):

@property
def name_without_prefix(self):
return _get_name(self.uri, self.delimiter)
name = _get_name(self.uri, self.delimiter)
name = name.replace('–', '')
name = name.replace('-', '')
return name

@property
def name(self):
Expand Down Expand Up @@ -142,6 +145,9 @@ def query_name_without_prefix(self):
return self.name_without_prefix + "value"
return self.name_without_prefix

def toPython(self):
return self.uri

def __repr__(self):
return str(self.name)

Expand Down
Loading

0 comments on commit 94b3917

Please sign in to comment.