Merge pull request #84 from pyscal/strict_triple_checking

Strict triple checking
pyscal · Apr 16, 2024 · 94b3917 · 94b3917
2 parents 9b92da0 + 0fe7669
commit 94b3917
Show file tree

Hide file tree

Showing 13 changed files with 6,187 additions and 3,640 deletions.
diff --git a/atomrdf/data/asmo.owl b/atomrdf/data/asmo.owl
@@ -15,6 +15,7 @@
         <terms:creator>https://orcid.org/0000-0001-7564-7990</terms:creator>
         <terms:description>ASMO is an ontology that aims to define the concepts needed to describe commonly used atomic scale simulation methods, i.e. density functional theory, molecular dynamics, Monte Carlo methods, etc. ASMO uses the Provenance Ontology (PROV-O) to describe the simulation process.</terms:description>
         <terms:title>Atomistic Simulation Methods Ontology (ASMO)</terms:title>
+        <owl:versionInfo rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0.0.1</owl:versionInfo>
     </owl:Ontology>
 
 

diff --git a/atomrdf/graph.py b/atomrdf/graph.py
@@ -4,7 +4,7 @@
 object is stored in triplets.
 """
 
-from rdflib import Graph, Literal, Namespace, XSD, RDF, RDFS, BNode, URIRef, FOAF, SKOS, DCTERMS
+from rdflib import Graph, Literal,  XSD, RDF, RDFS, BNode, URIRef
 
 import os
 import numpy as np
@@ -29,11 +29,7 @@
 from atomrdf.stores import create_store
 import atomrdf.json_io as json_io
 
-
-
-CMSO = Namespace("http://purls.helmholtz-metadaten.de/cmso/")
-PLDO = Namespace("http://purls.helmholtz-metadaten.de/pldo/")
-PODO = Namespace("http://purls.helmholtz-metadaten.de/podo/")
+from atomrdf.namespace import Namespace, CMSO, PLDO, PODO, ASMO
 
 #read element data file
 file_location = os.path.dirname(__file__).split('/')
@@ -112,11 +108,38 @@ def add_structure(self, structure):
         structure.graph = self
         structure.to_graph()
 
+    def _modify_triple(self, triple):
+        modified_triple = []
+        for term in triple:
+            if type(term).__name__ == 'OntoTerm':
+                modified_triple.append(term.namespace_object)
+            else:
+                modified_triple.append(term)
+        return tuple(modified_triple)
+
     def add(self, triple):
-        if str(triple[2].toPython()) != 'None':
-            self.graph.add(triple)
-
+        """
+        Force assumes that you are passing rdflib terms, defined with
+        RDFLib Namespace
+        """
+        modified_triple = self._modify_triple(triple)
+        if str(modified_triple[2].toPython()) != 'None':
+            self.graph.add(modified_triple)
+
 
+    def triples(self, triple):
+        modified_triple = self._modify_triple(triple)
+        return self.graph.triples(modified_triple)
+
+    def value(self, arg1, arg2):
+        modified_double = self._modify_triple((arg1, arg2))
+        return self.graph.value(modified_double[0], modified_double[1])
+
+    def remove(self, triple):
+        modified_triple = self._modify_triple(triple)
+        return self.graph.remove(modified_triple)
+
+
     def _initialize_graph(self):
         """
         Create the RDF Graph from the data stored
@@ -159,19 +182,19 @@ def add_calculated_quantity(self, sample, propertyname, value, unit=None):
 
 
     def inspect_sample(self):
-        natoms = self.graph.value(sample, CMSO.hasNumberOfAtoms).toPython()
-        material = list([k[2] for k in self.graph.triples((sample, CMSO.hasMaterial, None))])[0]
-        defects = list([k[2] for k in self.graph.triples((material, CMSO.hasDefect, None))])
-        composition = list([k[2].toPython() for k in self.graph.triples((material, CMSO.hasElementRatio, None))])
-        crystalstructure = self.graph.value(material, CMSO.hasStructure)
-        spacegroupsymbol = self.graph.value(crystalstructure, CMSO.hasSpaceGroupSymbol).toPython()
-
-        lattice = self.graph.value(sample, CMSO.hasNumberOfAtoms).toPython()
-        defect_types = list([self.graph.value(d, RDF.type).toPython() for d in defects])
-        prop_nodes = list([k[2] for k in self.graph.triples((sample, CMSO.hasCalculatedProperty, None))])
-        props = list([self.graph.value(prop_node, RDFS.label) for prop_node in prop_nodes])
-        propvals = list([self.graph.value(d, CMSO.hasValue).toPython() for d in prop_nodes])
-        units = list([self.graph.value(d, CMSO.hasUnit).toPython() for d in prop_nodes])
+        natoms = self.value(sample, CMSO.hasNumberOfAtoms).toPython()
+        material = list([k[2] for k in self.triples((sample, CMSO.hasMaterial, None))])[0]
+        defects = list([k[2] for k in self.triples((material, CMSO.hasDefect, None))])
+        composition = list([k[2].toPython() for k in self.triples((material, CMSO.hasElementRatio, None))])
+        crystalstructure = self.value(material, CMSO.hasStructure)
+        spacegroupsymbol = self.value(crystalstructure, CMSO.hasSpaceGroupSymbol).toPython()
+
+        lattice = self.value(sample, CMSO.hasNumberOfAtoms).toPython()
+        defect_types = list([self.value(d, RDF.type).toPython() for d in defects])
+        prop_nodes = list([k[2] for k in self.triples((sample, CMSO.hasCalculatedProperty, None))])
+        props = list([self.value(prop_node, RDFS.label) for prop_node in prop_nodes])
+        propvals = list([self.value(d, CMSO.hasValue).toPython() for d in prop_nodes])
+        units = list([self.value(d, CMSO.hasUnit).toPython() for d in prop_nodes])
         st = []
         st.append(f'Sample with {natoms} atoms.\n')
         st.append("Material:\n")
@@ -330,12 +353,12 @@ def archive(self, package_name, format='turtle', compress=True):
 
         #now go through each sample, and copy the file, at the same time fix the paths
         for sample in self.samples:
-            filepath = self.graph.value(URIRef(f'{sample}_Position'), CMSO.hasPath).toPython()
+            filepath = self.value(URIRef(f'{sample}_Position'), CMSO.hasPath).toPython()
             shutil.copy(filepath, structure_store)
 
             #now we have to remove the old path, and fix new
             for val in ['Position', 'Species']:
-                self.graph.remove((URIRef(f'{sample}_{val}'), CMSO.hasPath, None))
+                self.remove((URIRef(f'{sample}_{val}'), CMSO.hasPath, None))
 
                 #assign corrected path
                 new_relpath = "/".join(['rdf_structure_store', filepath.split('/')[-1]])
@@ -428,20 +451,20 @@ def n_samples(self):
         Number of samples in the Graph
         """
 
-        return len([x for x in self.graph.triples((None, RDF.type, CMSO.AtomicScaleSample))])
+        return len([x for x in self.triples((None, RDF.type, CMSO.AtomicScaleSample))])
 
     @property
     def samples(self):
         """
         Returns a list of all Samples in the graph
         """
 
-        return [x[0] for x in self.graph.triples((None, RDF.type, CMSO.AtomicScaleSample))]
+        return [x[0] for x in self.triples((None, RDF.type, CMSO.AtomicScaleSample))]
 
     def iterate_graph(self, item, create_new_graph=False):
         if create_new_graph:
             self.sgraph = KnowledgeGraph()
-        triples = list(self.graph.triples((item, None, None)))
+        triples = list(self.triples((item, None, None)))
         for triple in triples:
             self.sgraph.graph.add(triple)
             self.iterate_graph(triple[2])
@@ -467,7 +490,7 @@ def get_sample(self, sample, no_atoms=False):
 
         self.iterate_graph(sample, create_new_graph=True)
         if no_atoms:
-            na = self.sgraph.graph.value(sample, CMSO.hasNumberOfAtoms).toPython()
+            na = self.sgraph.value(sample, CMSO.hasNumberOfAtoms).toPython()
             return self.sgraph, na
         return self.sgraph
 
@@ -486,18 +509,18 @@ def get_system_from_sample(self, sample):
             corresponding system
         """
 
-        simcell = self.graph.value(sample, CMSO.hasSimulationCell)
+        simcell = self.value(sample, CMSO.hasSimulationCell)
         cell_vectors = [[], [], []]
 
-        for s in self.graph.triples((simcell, CMSO.hasVector, None)):
-            cell_vectors[0].append(self.graph.value(s[2], CMSO.hasComponent_x).toPython())
-            cell_vectors[1].append(self.graph.value(s[2], CMSO.hasComponent_y).toPython())
-            cell_vectors[2].append(self.graph.value(s[2], CMSO.hasComponent_z).toPython())
+        for s in self.triples((simcell, CMSO.hasVector, None)):
+            cell_vectors[0].append(self.value(s[2], CMSO.hasComponent_x).toPython())
+            cell_vectors[1].append(self.value(s[2], CMSO.hasComponent_y).toPython())
+            cell_vectors[2].append(self.value(s[2], CMSO.hasComponent_z).toPython())
 
         #cell_vectors
-        filepath = self.graph.value(URIRef(f'{sample}_Position'), CMSO.hasPath).toPython()
-        position_identifier = self.graph.value(URIRef(f'{sample}_Position'), CMSO.hasIdentifier).toPython()
-        species_identifier = self.graph.value(URIRef(f'{sample}_Species'), CMSO.hasIdentifier).toPython()
+        filepath = self.value(URIRef(f'{sample}_Position'), CMSO.hasPath).toPython()
+        position_identifier = self.value(URIRef(f'{sample}_Position'), CMSO.hasIdentifier).toPython()
+        species_identifier = self.value(URIRef(f'{sample}_Species'), CMSO.hasIdentifier).toPython()
 
         #open the file for reading
         with open(filepath, 'r') as fin:

diff --git a/atomrdf/namespace.py b/atomrdf/namespace.py
@@ -0,0 +1,35 @@
+import os
+from rdflib import Literal, URIRef
+from rdflib import Namespace as RDFLibNamespace
+from pyscal3.atoms import AttrSetter
+
+from atomrdf.network.network import OntologyNetwork
+
+class Namespace(AttrSetter, RDFLibNamespace):
+    def __init__(self, infile, delimiter='/'):
+        AttrSetter.__init__(self)
+        self.network = OntologyNetwork(infile, delimiter=delimiter)
+        #print(type(self.network.onto.tree.base_iri))
+        #self.namespace = RDFLibNamespace(self.network.onto.tree.base_iri)
+        RDFLibNamespace.__init__(self.network.onto.tree.base_iri)
+        #self.namespace = RDFLibNamespace("http://purls.helmholtz-metadaten.de/cmso/")
+        self.name = self.network.onto.tree.name
+        mapdict = {}
+
+        #now iterate over all attributes
+        for k1 in ['class', 'object_property', 'data_property']:
+            for k2, val in self.network.onto.attributes[k1].items():
+                if val.namespace == self.name:
+                    mapdict[val.name_without_prefix] = val
+
+        #add attributes
+        self._add_attribute(mapdict)
+
+
+file_location = os.path.dirname(__file__)
+
+CMSO = Namespace(os.path.join(file_location,  'data/cmso.owl'))
+PLDO = Namespace(os.path.join(file_location,  'data/pldo.owl'))
+PODO = Namespace(os.path.join(file_location,  'data/podo.owl'))
+ASMO = Namespace(os.path.join(file_location,  'data/asmo.owl'))
+PROV = RDFLibNamespace("http://www.w3.org/ns/prov#")
diff --git a/atomrdf/network/parser.py b/atomrdf/network/parser.py
@@ -181,19 +181,33 @@ def _parse_class_basic(self):
             iri = c.iri
             #print(iri)
             #print(iri)
-            try:
-                subclasses = self.tree.search(subclass_of=getattr(self.tree, c.name))
-                for sb in subclasses:
-                    term = OntoTerm(sb.iri, delimiter=self.delimiter)
-                    term.node_type ='class'
-                    self.attributes['class'][term.name] = term
-                subclasses = [strip_name(sb.iri, self.delimiter) for sb in subclasses]
-                classes.append(subclasses)
-            except:
-                term = OntoTerm(c.iri, delimiter=self.delimiter)
+            #CHILDREN
+            children = self.tree.get_children_of(c)
+            named_instances = self.tree.get_instances_of(c)
+            equiv_classes = c.equivalent_to
+            subclasses = [*children, *named_instances, *equiv_classes]
+            subclasses.append(c)
+            for sb in subclasses:
+                term = OntoTerm(sb.iri, delimiter=self.delimiter)
                 term.node_type ='class'
-                self.attributes['class'][term.name] = term                
-                classes.append([strip_name(c.iri, self.delimiter)])
+                self.attributes['class'][term.name] = term
+            subclasses = [strip_name(sb.iri, self.delimiter) for sb in subclasses]
+            classes.append(subclasses)
+
+
+            #try:
+            #    subclasses = self.tree.search(subclass_of=getattr(self.tree, c.name))
+            #    for sb in subclasses:
+            #        term = OntoTerm(sb.iri, delimiter=self.delimiter)
+            #        term.node_type ='class'
+            #        self.attributes['class'][term.name] = term
+            #    subclasses = [strip_name(sb.iri, self.delimiter) for sb in subclasses]
+            #    classes.append(subclasses)
+            #except:
+            #    term = OntoTerm(c.iri, delimiter=self.delimiter)
+            #    term.node_type ='class'
+            #    self.attributes['class'][term.name] = term                
+            #    classes.append([strip_name(c.iri, self.delimiter)])
         return classes
 
     def _aggregate_keys(self, dd):

diff --git a/atomrdf/network/term.py b/atomrdf/network/term.py
@@ -91,7 +91,10 @@ def uri(self, val):
 
     @property
     def name_without_prefix(self):
-        return _get_name(self.uri, self.delimiter)
+        name = _get_name(self.uri, self.delimiter)
+        name = name.replace('–', '')
+        name = name.replace('-', '')
+        return name
 
     @property
     def name(self):
@@ -142,6 +145,9 @@ def query_name_without_prefix(self):
             return self.name_without_prefix + "value"
         return self.name_without_prefix
 
+    def toPython(self):
+        return self.uri
+
     def __repr__(self):
         return str(self.name)