From 5ce88f7e6f2444eb8516115ce44e3be8a77054a5 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Fri, 14 Jul 2023 15:29:03 +0200
Subject: [PATCH 01/24] add draft of new parser

---
 pyscal_rdf/parser.py | 176 +++++++++++++++++++++++++++++++++++++++++++
 pyscal_rdf/term.py   |  43 +++++++++++
 2 files changed, 219 insertions(+)
 create mode 100644 pyscal_rdf/parser.py
 create mode 100644 pyscal_rdf/term.py

diff --git a/pyscal_rdf/parser.py b/pyscal_rdf/parser.py
new file mode 100644
index 0000000..0753c25
--- /dev/null
+++ b/pyscal_rdf/parser.py
@@ -0,0 +1,176 @@
+from onto_network.term import OntoTerm 
+from owlready2 import get_ontology
+
+import os
+import copy
+import numpy as np
+import itertools
+
+class OntoParser:
+    def __init__(self, infile, delimiter='/'):
+        if os.path.exists(infile):
+            self.tree = get_ontology(f'file://{infile}').load()
+        else:
+            raise FileNotFoundError(f'file {infile} not found!')
+        self.attributes = {}
+        self.attributes['class'] = {}
+        self.attributes['object_property'] = {}
+        self.attributes['data_property'] = {}        
+        self.delimiter = delimiter
+        self._parse_class()
+        self._parse_object_property()
+        self._parse_data_property()        
+    
+    def _strip_name(self, uri):
+        uri_split = uri.split(self.delimiter)
+        if len(uri_split)>1:
+            return ":".join(uri_split[-2:])
+        else:
+            return uri
+    
+    def _strip_datatype(self, uri, delimiter='#'):
+        uri_split = uri.split(delimiter)
+        return uri_split[-1]        
+        
+    def _dict_to_lst(self, d):
+        return [val for key, val in d.items()]
+    
+    def _get_subclasses(self, name):
+        arg = self._in_which_bin_is_class(name)
+        if arg is not None:
+            return self.classes[arg]
+        else:
+            return [name]
+                
+    def _parse_data_property(self):
+        for c in self.tree.data_properties():
+            iri = c.iri
+            dm = c.domain
+            try:
+                dm = [self._strip_name(d.iri) for d in dm[0].Classes]
+            except:
+                dm = [self._strip_name(d.iri) for d in dm]
+            
+            #now get subclasses
+            dm = [self._get_subclasses(d) for d in dm]
+            dm = list(itertools.chain(*dm))
+            
+            rn = c.range
+            try:
+                rn = [r.__name__ for r in rn[0].Classes if r is not None]
+            except:
+                rn = [r.__name__ for r in rn if r is not None]
+            term = OntoTerm(iri)
+            term.domain = dm
+            term.range = rn
+            term.node_type = 'data_property'
+            self.attributes['data_property'][term.name] = term
+            #assign this data
+            for d in dm:
+                if d!='07:owl#Thing':
+                    self.attributes['class'][d].is_range_of.append(term.name)
+            
+    def _parse_object_property(self):
+        for c in self.tree.object_properties():
+            iri = c.iri
+            dm = c.domain
+            try:
+                dm = [self._strip_name(d.iri) for d in dm[0].Classes]
+            except:
+                dm = [self._strip_name(d.iri) for d in dm]
+            
+            #now get subclasses
+            dm = [self._get_subclasses(d) for d in dm]
+            dm = list(itertools.chain(*dm))
+
+            rn = c.range
+            try:
+                rn = [self._strip_name(r.iri) for r in rn[0].Classes]
+            except:
+                rn = [self._strip_name(r.iri) for r in rn]
+            
+            #now get subclasses
+            rn = [self._get_subclasses(d) for d in rn]
+            rn = list(itertools.chain(*rn))
+
+            term = OntoTerm(iri)
+            term.domain = dm
+            term.range = rn
+            term.node_type = 'object_property'
+            self.attributes['object_property'][term.name] = term
+            for d in dm:
+                if d!='07:owl#Thing':
+                    self.attributes['class'][d].is_range_of.append(term.name)
+            for r in rn:
+                if r!='07:owl#Thing':
+                    self.attributes['class'][d].is_domain_of.append(term.name)
+                
+    def _parse_class_basic(self):
+        classes = []
+        for c in self.tree.classes():
+            iri = c.iri
+            #print(iri)
+            try:
+                subclasses = self.tree.search(subclass_of=getattr(self.tree, c.name))
+                for sb in subclasses:
+                    term = OntoTerm(sb.iri)
+                    term.node_type ='class'
+                    self.attributes['class'][term.name] = term
+                subclasses = [self._strip_name(sb.iri) for sb in subclasses]
+                classes.append(subclasses)
+            except:
+                term = OntoTerm(c.iri)
+                term.node_type ='class'
+                self.attributes['class'][term.name] = term                
+                classes.append([self._strip_name(c.iri)])
+        return classes
+    
+    def _aggregate_keys(self, dd):
+        lst = copy.deepcopy(dd)
+        #choose the first list
+        large_list = []
+        start = lst[0]
+        #delete it from the main list
+        nruns = len(lst)
+        del lst[0]
+        #now loop, if there is intersection add to this list
+        while True:
+            found = False
+            index_to_delete = []
+            for count, ls in enumerate(lst):
+                common = len(list(set(start) & set(ls)))
+                #print(common)
+                if  common>0:
+                    #common elements found! merge them
+                    for l in ls:
+                        start.append(l)
+                    found = True
+                    index_to_delete.append(count)
+            if found:
+                for ii in index_to_delete[::-1]:
+                    del lst[ii]
+            else:
+                large_list.append(np.unique(start))
+                if len(lst)==0:
+                    break
+                else:
+                    start = lst[0]
+                    del lst[0]
+        return large_list
+            
+    def _parse_class(self):
+        sub_classes = self._parse_class_basic()
+        #now we have to go through and clean up sub classes
+        sub_classes = self._aggregate_keys(sub_classes)
+        self.classes = sub_classes
+        
+    def _in_which_bin_is_class(self, name):
+        for count, lst in enumerate(self.classes):
+            if name in lst:
+                return count
+        else:
+            return None
+    
+            
+            
+        
\ No newline at end of file
diff --git a/pyscal_rdf/term.py b/pyscal_rdf/term.py
new file mode 100644
index 0000000..53e5515
--- /dev/null
+++ b/pyscal_rdf/term.py
@@ -0,0 +1,43 @@
+
+class OntoTerm:
+    def __init__(self, uri, node_type=None, 
+                dm=[], rn=[], data_type=None, 
+                 node_id=None, delimiter='/'):
+        """
+        This is class that represents an ontology element
+        """
+        self.uri = uri
+        #name of the class
+        self._name = None
+        #type: can be object property, data property, or class
+        self.node_type = node_type
+        #now we need domain and range
+        self.domain = dm
+        self.range = rn
+        #datatype, that is only need for data properties
+        self.data_type = data_type
+        #identifier
+        self.node_id = node_id
+        self.subclasses = []
+        self.delimiter = delimiter
+        self.is_domain_of = []
+        self.is_range_of = []
+
+    @property
+    def uri(self):
+        return self._uri
+    
+    @uri.setter
+    def uri(self, val):
+        self._uri = val
+        
+    @property
+    def name(self):
+        uri_split = self.uri.split(self.delimiter)
+        if len(uri_split)>1:
+            return ":".join(uri_split[-2:])
+        else:
+            return self.uri
+        
+    def __repr__(self):
+        return str(self.name)
\ No newline at end of file

From 217b32d81ff338365ed3eaa2e20b85a4d89c45f8 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Fri, 14 Jul 2023 15:33:48 +0200
Subject: [PATCH 02/24] update network

---
 pyscal_rdf/network.py | 144 ++++++++++++++++++------------------------
 1 file changed, 60 insertions(+), 84 deletions(-)

diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py
index 1afe09d..131f390 100644
--- a/pyscal_rdf/network.py
+++ b/pyscal_rdf/network.py
@@ -1,99 +1,75 @@
 import networkx as nx
+import graphviz
 import matplotlib.pyplot as plt
 import numpy as np
+import os
+from pyscal_rdf.parser import OntoParser
 
-class Network:
+owlfile = os.path.join(os.path.dirname(__file__), "data/cmso.owl")
+
+def _replace_name(name):
+    return ".".join(name.split(':'))
+
+class OntologyNetwork:
     """
-    Network representation of CMSO
+    Network representation of Onto
     """
-    def __init__(self):
+    def __init__(self, infile=None):
+        if infile is None:
+            infile = owlfile
+            
         self.g = nx.DiGraph()
-    
-    def add(self, sub, pred, obj, dtype=None, pred_prefix="cmso"):
-        pred = f'{pred_prefix}:{pred}'
-        self.g.add_node(sub, node_type="object")
-        self.g.add_node(pred, node_type="property")
-        if dtype is not None:
-            nd = "data"
-        else:
-            nd = "object"
-        self.g.add_node(obj, dtype=dtype, node_type=nd)            
-        self.g.add_edge(sub, pred)
-        self.g.add_edge(pred, obj)
-    
-    def draw(self):
-        nx.draw(self.g, with_labels=True, font_weight='bold')
+        self.onto = OntoParser(infile)
+        self.data_prefix = 'value'
         
+        #call methods
+        self._add_class_nodes()
+        self._add_object_properties()
+        self._add_data_properties()
+                
     def get_shortest_path(self, source, target):
         path = nx.shortest_path(self.g, source=source, target=target)
         return path
     
-class OntologyNetwork(Network):
-    def __init__(self):
-        super().__init__()
-        self.add("Sample", "hasMaterial", "Material")
-        self.add("Material", "hasElementRatio", "ElementRatio", dtype="string")
-
-        self.add("Sample", "hasSimulationCell", "SimulationCell")
-        self.add("SimulationCell", "hasVolume", "Volume", dtype="float")
-        self.add("Sample", "hasNumberOfAtoms", "NumberOfAtoms", dtype="integer")
-
-        self.add("SimulationCell", "hasLength", "SimulationCellLength")
-        self.add("SimulationCellLength", "hasLength_x", "SimulationCellLength_x", dtype="float")
-        self.add("SimulationCellLength", "hasLength_y", "SimulationCellLength_y", dtype="float")
-        self.add("SimulationCellLength", "hasLength_z", "SimulationCellLength_z", dtype="float")
-
-        self.add("SimulationCell", "hasVector", "SimulationCellVectorA")
-        self.add("SimulationCellVectorA", "hasComponent_x", "SimulationCellVectorA_x", dtype="float")
-        self.add("SimulationCellVectorA", "hasComponent_y", "SimulationCellVectorA_y", dtype="float")
-        self.add("SimulationCellVectorA", "hasComponent_z", "SimulationCellVectorA_z", dtype="float")
-        self.add("SimulationCell", "hasVector", "SimulationCellVectorB")
-        self.add("SimulationCellVectorB", "hasComponent_x", "SimulationCellVectorB_x", dtype="float")
-        self.add("SimulationCellVectorB", "hasComponent_y", "SimulationCellVectorB_y", dtype="float")
-        self.add("SimulationCellVectorB", "hasComponent_z", "SimulationCellVectorB_z", dtype="float")
-        self.add("SimulationCell", "hasVector", "SimulationCellVectorC")
-        self.add("SimulationCellVectorC", "hasComponent_x", "SimulationCellVectorC_x", dtype="float")
-        self.add("SimulationCellVectorC", "hasComponent_y", "SimulationCellVectorC_y", dtype="float")
-        self.add("SimulationCellVectorC", "hasComponent_z", "SimulationCellVectorC_z", dtype="float")
-
-        self.add("SimulationCell", "hasAngle", "SimulationCellAngle")
-        self.add("SimulationCellAngle", "hasAngle_alpha", "SimulationCellAngle_alpha", dtype="float")
-        self.add("SimulationCellAngle", "hasAngle_beta", "SimulationCellAngle_beta", dtype="float")
-        self.add("SimulationCellAngle", "hasAngle_gamma", "SimulationCellAngle_gamma", dtype="float")
-
-        self.add("Material", "hasStructure", "CrystalStructure")
-        self.add("CrystalStructure", "hasAltName", "CrystalStructureAltName", dtype="string")
-        self.add("CrystalStructure", "hasSpaceGroupSymbol", "SpaceGroupSymbol", dtype="string")
-        self.add("CrystalStructure", "hasSpaceGroupNumber", "SpaceGroupNumber", dtype="integer")
-
-        self.add("CrystalStructure", "hasUnitCell", "UnitCell")
-        self.add("UnitCell", "hasBravaisLattice", "LatticeSystem")
-        self.add("UnitCell", "hasLatticeParameter", "LatticeParameter")
-        self.add("LatticeParameter", "hasLength_x", "LatticeParameter_x", dtype="float")
-        self.add("LatticeParameter", "hasLength_y", "LatticeParameter_y", dtype="float")
-        self.add("LatticeParameter", "hasLength_z", "LatticeParameter_z", dtype="float")
-        self.add("UnitCell", "hasAngle", "LatticeAngle")
-        self.add("LatticeAngle", "hasAngle_alpha", "LatticeAngle_alpha", dtype="float")
-        self.add("LatticeAngle", "hasAngle_beta", "LatticeAngle_beta", dtype="float")
-        self.add("LatticeAngle", "hasAngle_gamma", "LatticeAngle_gamma", dtype="float")
-
-        #add GB properties
-        self.add("Material", "hasDefect", "Defect", pred_prefix="cmso")
-        self.add("Defect", "type", "GrainBoundary", pred_prefix="rdf")
-        self.add("Defect", "type", "TwistBoundary", pred_prefix="rdf")
-        self.add("Defect", "type", "TiltBoundary", pred_prefix="rdf")
-        self.add("Defect", "type", "SymmetricTiltBoundary", pred_prefix="rdf")
-        self.add("Defect", "type", "MixedBoundary", pred_prefix="rdf")
-        self.add("Defect", "hasSigmaValue", "Sigma", dtype="integer", pred_prefix="pldo")
-        self.add("Defect", "hasGBPlane", "GBPlane", pred_prefix="pldo", dtype="string")
-        self.add("Defect", "hasRotationAxis", "RotationAxis", pred_prefix="pldo", dtype="string")
-        self.add("Defect", "hasMisorientationAngle", "MisorientationAngle", pred_prefix="pldo", dtype="float")
-
-        #add vacancy
-        self.add("Defect", "type", "Vacancy", pred_prefix="rdf")
-        self.add("SimulationCell", "hasVacancyConcentration", "VacancyConcentration", pred_prefix="podo", dtype="float")
-        self.add("SimulationCell", "hasNumberOfVacancies", "NumberOfVacancy", pred_prefix="podo", dtype="integer")
-         
+    def _add_class_nodes(self):
+        for key, val in self.onto.attributes['class'].items():
+            self.g.add_node(val.name, node_type='class')
+    
+    def _add_object_properties(self):
+        for key, val in self.onto.attributes['object_property'].items():
+            self.g.add_node(val.name, node_type='object_property')
+            #find domain
+            for d in val.domain:
+                self.g.add_edge(d, val.name)
+            for r in val.range:
+                self.g.add_edge(val.name, r)
+    
+    def _add_data_properties(self):
+        for key, val in self.onto.attributes['data_property'].items():
+            self.g.add_node(val.name, node_type='data_property')
+            for d in val.domain:
+                self.g.add_edge(d, val.name)
+            for r in val.range:
+                data_node = f'{val.name}{self.data_prefix}'
+                self.g.add_node(data_node, node_type='literal')
+                self.g.add_edge(val.name, data_node)
+                
+    
+    def draw(self, styledict = {"class": {"shape":"box"},
+                                "object_property": {"shape":"ellipse"},
+                                "data_property": {"shape":"ellipse"},
+                                "literal": {"shape":"parallelogram"},}):
+        dot = graphviz.Digraph()
+        node_list = list(self.g.nodes(data='node_type'))
+        edge_list = list(self.g.edges)
+        for node in node_list:
+            name = _replace_name(node[0])
+            if node[1] is not None:
+                t = node[1]
+                dot.node(name, shape=styledict[t]['shape'], fontsize="6")
+        for edge in edge_list:
+            dot.edge(_replace_name(edge[0]), _replace_name(edge[1]))
+        return dot
 
     def get_path_from_sample(self, target):
         path = self.get_shortest_path(source="Sample", target=target)

From 36995f4982938be66ae42140dfa793499f634fd9 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 12:58:09 +0200
Subject: [PATCH 03/24] add add method for parser

---
 pyscal_rdf/network.py |  3 +++
 pyscal_rdf/parser.py  | 26 ++++++++++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py
index 131f390..9f754d8 100644
--- a/pyscal_rdf/network.py
+++ b/pyscal_rdf/network.py
@@ -26,6 +26,9 @@ def __init__(self, infile=None):
         self._add_class_nodes()
         self._add_object_properties()
         self._add_data_properties()
+
+    def __add__(self, ontonetwork):
+        pass
                 
     def get_shortest_path(self, source, target):
         path = nx.shortest_path(self.g, source=source, target=target)
diff --git a/pyscal_rdf/parser.py b/pyscal_rdf/parser.py
index 0753c25..3e777ff 100644
--- a/pyscal_rdf/parser.py
+++ b/pyscal_rdf/parser.py
@@ -17,10 +17,32 @@ def __init__(self, infile, delimiter='/'):
         self.attributes['object_property'] = {}
         self.attributes['data_property'] = {}        
         self.delimiter = delimiter
+        self.classes = None
         self._parse_class()
         self._parse_object_property()
-        self._parse_data_property()        
-    
+        self._parse_data_property()
+
+    def __add__(self, ontoparser):
+        """
+        Add method; in principle it should add-
+        - classes
+        - attributes dict
+        """
+        for mainkey in ['class', 'object_property', 'data_property']:
+            if mainkey in ontoparser.attributes.keys():
+                for key, val in ontoparser.attributes[mainkey].items():
+                    self.attributes[mainkey][key] = val        
+        
+        #now change classes
+        if ontoparser.classes is not None:
+            for clx in ontoparser.classes:
+                self.classes.append(clx)
+
+        return self
+
+    def __radd__(self, ontoparser):
+        return self.__add__(ontoparser)
+
     def _strip_name(self, uri):
         uri_split = uri.split(self.delimiter)
         if len(uri_split)>1:

From 3c9018e2591b5209199493db208112503a571954 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 13:07:14 +0200
Subject: [PATCH 04/24] add add method for ontonetwork

---
 pyscal_rdf/network.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py
index 9f754d8..7a2c842 100644
--- a/pyscal_rdf/network.py
+++ b/pyscal_rdf/network.py
@@ -22,14 +22,21 @@ def __init__(self, infile=None):
         self.onto = OntoParser(infile)
         self.data_prefix = 'value'
         
+    def _parse_all(self):
         #call methods
         self._add_class_nodes()
         self._add_object_properties()
         self._add_data_properties()
 
     def __add__(self, ontonetwork):
-        pass
-                
+        #add onto network
+        self.onto = self.onto + ontonetwork.onto
+        #now parse again
+        self._parse_all()
+
+    def __radd__(self, ontonetwork):
+        return self.__add__(ontonetwork)
+
     def get_shortest_path(self, source, target):
         path = nx.shortest_path(self.g, source=source, target=target)
         return path

From 51ecb3b5fad9d6fac259bff2466153c688e69ed6 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 13:13:17 +0200
Subject: [PATCH 05/24] fix bug in add

---
 pyscal_rdf/network.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py
index 7a2c842..2c02bcf 100644
--- a/pyscal_rdf/network.py
+++ b/pyscal_rdf/network.py
@@ -33,6 +33,7 @@ def __add__(self, ontonetwork):
         self.onto = self.onto + ontonetwork.onto
         #now parse again
         self._parse_all()
+        return self
 
     def __radd__(self, ontonetwork):
         return self.__add__(ontonetwork)

From 8421051584601cf12e23dc547b3cab970a387383 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 13:14:07 +0200
Subject: [PATCH 06/24] add overload of attributes

---
 pyscal_rdf/network.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py
index 2c02bcf..bc361b4 100644
--- a/pyscal_rdf/network.py
+++ b/pyscal_rdf/network.py
@@ -35,6 +35,9 @@ def __add__(self, ontonetwork):
         self._parse_all()
         return self
 
+    def attributes(self):
+        return self.onto.attributes
+
     def __radd__(self, ontonetwork):
         return self.__add__(ontonetwork)
 

From 1cf4c99bcba367a889a246b83bd4829b2fa7a700 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 13:26:18 +0200
Subject: [PATCH 07/24] add possibility to add path

---
 pyscal_rdf/network.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py
index bc361b4..c739062 100644
--- a/pyscal_rdf/network.py
+++ b/pyscal_rdf/network.py
@@ -69,6 +69,29 @@ def _add_data_properties(self):
                 self.g.add_edge(val.name, data_node)
                 
     
+    def add_path(self, triple):
+        """
+        Add a triple as path. Note that all attributes of the triple should already
+        exist in the graph. The ontology itself is not modified. Only the graph
+        representation of it is.
+        The expected use is to bridge between two(or more) different ontologies.
+        Therefore, mapping can only be between classes.
+        """
+        sub = triple[0]
+        pred = triple[1]
+        obj = triple[2]
+
+        for item in [sub, obj]:
+            if not item in self.attributes['class'].keys():
+                raise ValueError(f'{item} not found in self.attributes')
+        
+        if pred not in self.attributesp['object_property'].keys():
+            raise ValueError(f'{pred} not found in self.attributes')
+
+        #now add
+        self.g.add_edge(sub, pred)
+        self.g.add_edge(pred, obj) 
+
     def draw(self, styledict = {"class": {"shape":"box"},
                                 "object_property": {"shape":"ellipse"},
                                 "data_property": {"shape":"ellipse"},

From c69229bf6d1ad283182b1bed61092708a60b9207 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 13:37:45 +0200
Subject: [PATCH 08/24] map subclasses in add_path

---
 pyscal_rdf/network.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py
index c739062..26abd19 100644
--- a/pyscal_rdf/network.py
+++ b/pyscal_rdf/network.py
@@ -89,8 +89,13 @@ def add_path(self, triple):
             raise ValueError(f'{pred} not found in self.attributes')
 
         #now add
-        self.g.add_edge(sub, pred)
-        self.g.add_edge(pred, obj) 
+        subclasses = self.onto._get_subclasses(sub)
+        for subclass in subclasses:
+            self.g.add_edge(subclass, pred)
+
+        subclasses = self.onto._get_subclasses(obj)
+        for subclass in subclasses:
+            self.g.add_edge(pred, subclass) 
 
     def draw(self, styledict = {"class": {"shape":"box"},
                                 "object_property": {"shape":"ellipse"},

From 3ce4928c8d10a74a333fd2aaeba2c1b21434b22b Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 13:57:22 +0200
Subject: [PATCH 09/24] add possibilities to add data nodes

---
 pyscal_rdf/network.py | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py
index 26abd19..ca83e71 100644
--- a/pyscal_rdf/network.py
+++ b/pyscal_rdf/network.py
@@ -81,21 +81,30 @@ def add_path(self, triple):
         pred = triple[1]
         obj = triple[2]
 
-        for item in [sub, obj]:
-            if not item in self.attributes['class'].keys():
-                raise ValueError(f'{item} not found in self.attributes')
-        
-        if pred not in self.attributesp['object_property'].keys():
-            raise ValueError(f'{pred} not found in self.attributes')
+        if sub not in self.onto.attributes['class'].keys():
+            raise ValueError(f'{sub} not found in self.attributes')
 
         #now add
         subclasses = self.onto._get_subclasses(sub)
         for subclass in subclasses:
-            self.g.add_edge(subclass, pred)
+            self.g.add_edge(subclass, pred)            
+        
+        #now add pred
+        if pred in self.onto.attributes['object_property'].keys():
+            if obj not in self.onto.attributes['class'].keys():
+                raise ValueError(f'{obj} not found in self.attributes')
+            subclasses = self.onto._get_subclasses(obj)
+            for subclass in subclasses:
+                self.g.add_edge(pred, subclass) 
 
-        subclasses = self.onto._get_subclasses(obj)
-        for subclass in subclasses:
-            self.g.add_edge(pred, subclass) 
+        #another possibility it is data property
+        elif pred in self.onto.attributes['data_property'].keys():
+            data_node = f'{pred}{self.data_prefix}'
+            self.g.add_node(data_node, node_type='literal')
+            self.g.add_edge(pred, data_node)
+        
+        else:
+            raise ValueError(f'{pred} not found in self.attributes')
 
     def draw(self, styledict = {"class": {"shape":"box"},
                                 "object_property": {"shape":"ellipse"},

From 044aa7343051092462366fd722c0b564fcca2256 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 14:46:47 +0200
Subject: [PATCH 10/24] bug fi in draw

---
 pyscal_rdf/network.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py
index ca83e71..e260169 100644
--- a/pyscal_rdf/network.py
+++ b/pyscal_rdf/network.py
@@ -21,6 +21,7 @@ def __init__(self, infile=None):
         self.g = nx.DiGraph()
         self.onto = OntoParser(infile)
         self.data_prefix = 'value'
+        self._parse_all()
         
     def _parse_all(self):
         #call methods
@@ -35,6 +36,7 @@ def __add__(self, ontonetwork):
         self._parse_all()
         return self
 
+    @property
     def attributes(self):
         return self.onto.attributes
 

From cd593afb9881b70e9b624ba8954867ae39f76ebd Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 14:53:50 +0200
Subject: [PATCH 11/24] add datatype

---
 pyscal_rdf/network.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py
index e260169..cbe46bf 100644
--- a/pyscal_rdf/network.py
+++ b/pyscal_rdf/network.py
@@ -67,8 +67,9 @@ def _add_data_properties(self):
                 self.g.add_edge(d, val.name)
             for r in val.range:
                 data_node = f'{val.name}{self.data_prefix}'
-                self.g.add_node(data_node, node_type='literal')
+                self.g.add_node(data_node, node_type='literal', data_type=r.name)
                 self.g.add_edge(val.name, data_node)
+
                 
     
     def add_path(self, triple):

From 0b3b2133e2e939c726001f802d343aa89cbac3c7 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 14:58:48 +0200
Subject: [PATCH 12/24] add possibility to add datatype

---
 pyscal_rdf/network.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py
index cbe46bf..4f8977a 100644
--- a/pyscal_rdf/network.py
+++ b/pyscal_rdf/network.py
@@ -103,7 +103,7 @@ def add_path(self, triple):
         #another possibility it is data property
         elif pred in self.onto.attributes['data_property'].keys():
             data_node = f'{pred}{self.data_prefix}'
-            self.g.add_node(data_node, node_type='literal')
+            self.g.add_node(data_node, node_type='literal', data_type=obj)
             self.g.add_edge(pred, data_node)
         
         else:

From 222bd00c42d9e2dda9ab750011fe5bd9bcbc5b73 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 14:59:49 +0200
Subject: [PATCH 13/24] bug fix in datatype

---
 pyscal_rdf/network.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py
index 4f8977a..80b2030 100644
--- a/pyscal_rdf/network.py
+++ b/pyscal_rdf/network.py
@@ -67,7 +67,7 @@ def _add_data_properties(self):
                 self.g.add_edge(d, val.name)
             for r in val.range:
                 data_node = f'{val.name}{self.data_prefix}'
-                self.g.add_node(data_node, node_type='literal', data_type=r.name)
+                self.g.add_node(data_node, node_type='literal', data_type=r)
                 self.g.add_edge(val.name, data_node)
 
                 

From 064c25409d943037bdc5893cfb1870a196d08417 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 15:34:05 +0200
Subject: [PATCH 14/24] add namespace in the parser

---
 pyscal_rdf/parser.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pyscal_rdf/parser.py b/pyscal_rdf/parser.py
index 3e777ff..d360052 100644
--- a/pyscal_rdf/parser.py
+++ b/pyscal_rdf/parser.py
@@ -18,6 +18,7 @@ def __init__(self, infile, delimiter='/'):
         self.attributes['data_property'] = {}        
         self.delimiter = delimiter
         self.classes = None
+        self.namespaces = [self.tree.base_iri]
         self._parse_class()
         self._parse_object_property()
         self._parse_data_property()
@@ -38,6 +39,9 @@ def __add__(self, ontoparser):
             for clx in ontoparser.classes:
                 self.classes.append(clx)
 
+        for ns in ontoparser.namespaces:
+            self.namespaces.append(ns)
+
         return self
 
     def __radd__(self, ontoparser):

From c81ef2a1aa0d84bd9f44035b7d721ae39e7afd1d Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 15:39:52 +0200
Subject: [PATCH 15/24] convert namespace to dict

---
 pyscal_rdf/network.py |  8 ++++++++
 pyscal_rdf/parser.py  | 11 ++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py
index 80b2030..f9e1a78 100644
--- a/pyscal_rdf/network.py
+++ b/pyscal_rdf/network.py
@@ -40,6 +40,14 @@ def __add__(self, ontonetwork):
     def attributes(self):
         return self.onto.attributes
 
+    @property
+    def namespaces(self):
+        return self.onto.namespaces
+
+    @property
+    def extra_namespaces(self):
+        return self.onto.extra_namespaces
+
     def __radd__(self, ontonetwork):
         return self.__add__(ontonetwork)
 
diff --git a/pyscal_rdf/parser.py b/pyscal_rdf/parser.py
index d360052..98391c2 100644
--- a/pyscal_rdf/parser.py
+++ b/pyscal_rdf/parser.py
@@ -18,7 +18,9 @@ def __init__(self, infile, delimiter='/'):
         self.attributes['data_property'] = {}        
         self.delimiter = delimiter
         self.classes = None
-        self.namespaces = [self.tree.base_iri]
+        self.namespaces = {self.tree.name: self.tree.base_iri}
+        self.extra_namespaces = {'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
+        }
         self._parse_class()
         self._parse_object_property()
         self._parse_data_property()
@@ -39,8 +41,11 @@ def __add__(self, ontoparser):
             for clx in ontoparser.classes:
                 self.classes.append(clx)
 
-        for ns in ontoparser.namespaces:
-            self.namespaces.append(ns)
+        for key, val in ontoparser.namespaces.items():
+            self.namespaces[key] = val
+
+        for key, val in ontoparser.extra_namespaces.items():
+            self.extra_namespaces[key] = val
 
         return self
 

From 0f764b4a9941f1202d7f728ea4847ad60c99586b Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 15:43:37 +0200
Subject: [PATCH 16/24] restructure network code

---
 pyscal_rdf/network/__init__.py      | 0
 pyscal_rdf/{ => network}/network.py | 0
 pyscal_rdf/{ => network}/parser.py  | 0
 pyscal_rdf/{ => network}/term.py    | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 pyscal_rdf/network/__init__.py
 rename pyscal_rdf/{ => network}/network.py (100%)
 rename pyscal_rdf/{ => network}/parser.py (100%)
 rename pyscal_rdf/{ => network}/term.py (100%)

diff --git a/pyscal_rdf/network/__init__.py b/pyscal_rdf/network/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network/network.py
similarity index 100%
rename from pyscal_rdf/network.py
rename to pyscal_rdf/network/network.py
diff --git a/pyscal_rdf/parser.py b/pyscal_rdf/network/parser.py
similarity index 100%
rename from pyscal_rdf/parser.py
rename to pyscal_rdf/network/parser.py
diff --git a/pyscal_rdf/term.py b/pyscal_rdf/network/term.py
similarity index 100%
rename from pyscal_rdf/term.py
rename to pyscal_rdf/network/term.py

From 1528b4a23e73c95622f2c032842e0c936ed314a6 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 15:45:07 +0200
Subject: [PATCH 17/24] fix imports

---
 pyscal_rdf/graph.py           | 2 +-
 pyscal_rdf/network/network.py | 2 +-
 pyscal_rdf/network/parser.py  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py
index 1d8cb4e..61f054c 100644
--- a/pyscal_rdf/graph.py
+++ b/pyscal_rdf/graph.py
@@ -15,7 +15,7 @@
 import pandas as pd
 
 from pyscal_rdf.visualize import visualize_graph
-from pyscal_rdf.network import OntologyNetwork
+from pyscal_rdf.network.network import OntologyNetwork
 from pyscal_rdf.rdfsystem import System
 import pyscal_rdf.properties as prp
 #from pyscal3.core import System
diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py
index f9e1a78..a83b1e3 100644
--- a/pyscal_rdf/network/network.py
+++ b/pyscal_rdf/network/network.py
@@ -3,7 +3,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import os
-from pyscal_rdf.parser import OntoParser
+from pyscal_rdf.network.parser import OntoParser
 
 owlfile = os.path.join(os.path.dirname(__file__), "data/cmso.owl")
 
diff --git a/pyscal_rdf/network/parser.py b/pyscal_rdf/network/parser.py
index 98391c2..2af5442 100644
--- a/pyscal_rdf/network/parser.py
+++ b/pyscal_rdf/network/parser.py
@@ -1,4 +1,4 @@
-from onto_network.term import OntoTerm 
+from pyscal_rdf.network.term import OntoTerm 
 from owlready2 import get_ontology
 
 import os

From 7744e9691a00a37fb2c7157e92d3ca1f1e1651fa Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 15:59:02 +0200
Subject: [PATCH 18/24] return triple directly from path

---
 pyscal_rdf/network/network.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py
index a83b1e3..9f519f1 100644
--- a/pyscal_rdf/network/network.py
+++ b/pyscal_rdf/network/network.py
@@ -51,8 +51,13 @@ def extra_namespaces(self):
     def __radd__(self, ontonetwork):
         return self.__add__(ontonetwork)
 
-    def get_shortest_path(self, source, target):
+    def get_shortest_path(self, source, target, triples=False):
         path = nx.shortest_path(self.g, source=source, target=target)
+        if triples:
+            triple_list = []
+            for x in range(len(path)//2):
+                triple_list.append(path[2*x:2*x+3])
+            return triple_list
         return path
     
     def _add_class_nodes(self):

From cf76df4e56848c6c90c5625373bbd48a85c49f35 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 16:26:09 +0200
Subject: [PATCH 19/24] add strip name

---
 pyscal_rdf/network/network.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py
index 9f519f1..71fcaa7 100644
--- a/pyscal_rdf/network/network.py
+++ b/pyscal_rdf/network/network.py
@@ -36,6 +36,12 @@ def __add__(self, ontonetwork):
         self._parse_all()
         return self
 
+    def strip_name(self, name):
+        raw = name.split(':')
+        if len(raw) > 1:
+            return raw[-1]
+        return name
+
     @property
     def attributes(self):
         return self.onto.attributes

From be94d433243ff9ec9ba092a1b7ceea5f9ccb471f Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 23:36:02 +0200
Subject: [PATCH 20/24] add draft of new query

---
 pyscal_rdf/graph.py           |   2 +-
 pyscal_rdf/network/network.py | 185 ++++++++++++++++++----------------
 2 files changed, 97 insertions(+), 90 deletions(-)

diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py
index 61f054c..3401f35 100644
--- a/pyscal_rdf/graph.py
+++ b/pyscal_rdf/graph.py
@@ -93,7 +93,7 @@ def __init__(self, graph_file=None,
         self.material = None
         self.sysdict = None
         self.sgraph = None
-        self._query_graph = OntologyNetwork()
+        #self._query_graph = OntologyNetwork()
         self._atom_ids = None
     
     def process_structure(self, structure, format=None):
diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py
index 71fcaa7..135c889 100644
--- a/pyscal_rdf/network/network.py
+++ b/pyscal_rdf/network/network.py
@@ -5,7 +5,7 @@
 import os
 from pyscal_rdf.network.parser import OntoParser
 
-owlfile = os.path.join(os.path.dirname(__file__), "data/cmso.owl")
+owlfile = os.path.join(os.path.dirname(__file__), "../data/cmso.owl")
 
 def _replace_name(name):
     return ".".join(name.split(':'))
@@ -151,97 +151,104 @@ def get_path_from_sample(self, target):
             triplets.append(path[2*x:2*x+3])
         return triplets
         
-    def formulate_query(self, target, value):
-        #first get triplets
-        triplets = self.get_path_from_sample(target)
-        #start building query
-        query = self._formulate_query_path(triplets)
-        query.append(self._formulate_filter_expression(triplets, value))
-        query.append("}")
-        query = " ".join(query)
-        return query
+    def phrase_to_sparql(self, phrase):
+        def _extract_operation(phr):
+            r = phr.split(' ')
+            if len(r) != 3:
+                raise ValueError('wrong filters!')
+            return f'?value{r[1]}\"{r[2]}\"^^xsd:datatype'
+
+        conditions = []
+        operation = None
         
-    
-    def _formulate_query_path(self, triplets):
+        raw = phrase.split(' and ')
+        
+        if len(raw) > 1:
+            operation = '&&'
+        if operation is None:
+            raw = phrase.split(' or ')
+            if len(raw) > 1:
+                operation = '||'
+        
+        if operation is not None:
+            for ph in raw:
+                conditions.append(_extract_operation(ph))
+        else:
+            conditions.append(_extract_operation(phrase))
+        full_str = f' {operation} '.join(conditions)
+        #replace values
+        return full_str
+        
+
+    def validate_values(self, destinations, values):
+        combinator_dict = {'and': '&&', 'or': '||'}
+        combinator_list = values[1::2]
+        phrase_list = values[::2]
+        if not len(combinator_list) == len(destinations)-1:
+            raise ValueError("Invalid combinations!")
+        
+        sparql_phrase_list = []
+        for phrase, destination in zip(phrase_list, destinations):
+            sparql_phrase = phrase_to_sparql(phrase)
+            sparql_phrase = sparql_phrase.replace('value', self.strip_name(destination))
+            sparql_phrase = sparql_phrase.replace('datatype', self.g.nodes[destination]['data_type'])
+            sparql_phrase_list.append(sparql_phrase)
+            
+        #combine phrases with phrase list
+        updated_sparql_phrase_list = []
+        for count, sparql_phrase in enumerate(sparql_phrase_list):
+            updated_sparql_phrase_list.append(f'({sparql_phrase})')
+            if count < len(sparql_phrase_list)-1:
+                updated_sparql_phrase_list.append(combinator_dict[combinator_list[count]])
+            
+        full_filter = " ".join(updated_sparql_phrase_list)
+        return f'FILTER ({full_filter})'
+        
+    def create_query(self, source, destinations, values = None):
+        """
+        values is a dict with keys value, operation
+        """
+        if not isinstance(destinations, list):
+            destinations = [destinations]
+            
+        #start prefix of quer
         query = []
-        query.append("PREFIX cmso: <https://purls.helmholtz-metadaten.de/cmso/>")
-        query.append("PREFIX pldo: <https://purls.helmholtz-metadaten.de/pldo/>")
-        query.append("PREFIX podo: <https://purls.helmholtz-metadaten.de/podo/>")
-        query.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
-        query.append("SELECT DISTINCT ?sample")
+        for key, val in self.namespaces.items():
+            query.append(f'PREFIX {key}: <{val}>')
+        for key, val in self.extra_namespaces.items():
+            query.append(f'PREFIX {key}: <{val}>')
+        
+        #now for each destination, start adding the paths in the query
+        all_triplets = {}
+        for destination in destinations:
+            triplets = self.get_shortest_path(source, destination, triples=True)
+            all_triplets[destination] = triplets
+        
+        select_destinations = [f'?{self.strip_name(destination)}' for destination in destinations]
+        query.append(f'SELECT DISTINCT {" ".join(select_destinations)}')
         query.append("WHERE {")
-        for triple in triplets:
-            query.append("    ?%s %s ?%s ."%(triple[0].lower(), 
-                                                  triple[1], 
-                                                  triple[2].lower()))
-        return query
-    
-    def _formulate_filter_expression(self, triplets, value):                       
-        value, datatype = self._check_value(value)      
-        last_val = self.g.nodes[triplets[-1][-1]]
-        last_val_name = triplets[-1][-1].lower()
-        
-        #if it is nodetype data
-        if last_val['node_type'] == "data":
-            if datatype == "multi_string":
-                qstr = self._formulate_or_string_query(last_val, 
-                                                   last_val_name, 
-                                                   value)
-            elif datatype == "multi_number":
-                qstr = self._formulate_range_number_query(last_val, 
-                                                   last_val_name, 
-                                                   value)
-            else:
-                qstr = self._formulate_equal_query(last_val, 
-                                                   last_val_name, 
-                                                   value)
-            return qstr
-        else:
-            raise NotImplementedError("Non-data queries are not implemented")
-    
-    def _check_value(self, value):
-        if isinstance(value, list):
-            if not len(value) == 2:
-                raise ValueError("value can be maximum length 2")
-        else:
-            value = [value]
-        if all(isinstance(x, str) for x in value):
-            datatype = "string"
-        elif all(isinstance(x, (int, float)) for x in value):
-            datatype = "number"
-        else:
-            raise TypeError("Values have to be of same type")
-        if len(value) == 1:
-            datatype = f'single_{datatype}'
-        else:
-            datatype = f'multi_{datatype}'
-        return value, datatype
-    
-    
-    def _formulate_equal_query(self, last_val, last_val_name, value):
-        qstr = "FILTER (?%s=\"%s\"^^xsd:%s)"%(last_val_name, 
-                                              str(value[0]), 
-                                              last_val['dtype'])
-        return qstr
-    
-    def _formulate_or_string_query(self, last_val, last_val_name, value):
-        qstr = "FILTER (?%s=\"%s\"^^xsd:%s || ?%s=\"%s\"^^xsd:%s)"%(last_val_name, 
-                                                                    str(value[0]), 
-                                                                    last_val['dtype'],
-                                                                    last_val_name, 
-                                                                    str(value[1]), 
-                                                                    last_val['dtype'],)
-        return qstr
-    
-    def _formulate_range_number_query(self, last_val, last_val_name, value):
-        value = np.sort(value)
-        qstr = "FILTER (?%s >= \"%s\"^^xsd:%s && ?%s <= \"%s\"^^xsd:%s)"%(last_val_name, 
-                                                                    str(value[0]), 
-                                                                    last_val['dtype'],
-                                                                    last_val_name, 
-                                                                    str(value[1]), 
-                                                                    last_val['dtype'],)
-        return qstr
+        
+        #now add corresponding triples
+        for destination in destinations:
+            for triple in all_triplets[destination]:
+                query.append("    ?%s %s ?%s ."%(self.strip_name(triple[0]), 
+                                                 triple[1], 
+                                                 self.strip_name(triple[2])))
+        
+        #now we have to add filters
+        #filters are only needed if it is a dataproperty
+        filter_text = ''
+        
+        if values is not None:
+            lit_nodes = [node for node in self.g.nodes if 'node_type' in self.g.nodes[node].keys() and self.g.nodes[node]['node_type'] == 'literal']
+            data_destinations = [destination for destination in destinations if destination in lit_nodes]
+            if not len(data_destinations) == len(values):
+                raise ValueError(f'Length of destinations and values should be same, found {len(data_destinations)} and {len(values)}')
+            if len(data_destinations) > 0:
+                filter_text = validate_values(data_destinations, values)
+        query.append(filter_text)
+        query.append('}')
+        return '\n'.join(query)
 
         
             

From 5afa65e62e3d219e46aee3f6704308eba907560f Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Mon, 17 Jul 2023 23:38:06 +0200
Subject: [PATCH 21/24] bug fix in query

---
 pyscal_rdf/network/network.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py
index 135c889..d7e3f31 100644
--- a/pyscal_rdf/network/network.py
+++ b/pyscal_rdf/network/network.py
@@ -189,7 +189,7 @@ def validate_values(self, destinations, values):
         
         sparql_phrase_list = []
         for phrase, destination in zip(phrase_list, destinations):
-            sparql_phrase = phrase_to_sparql(phrase)
+            sparql_phrase = self.phrase_to_sparql(phrase)
             sparql_phrase = sparql_phrase.replace('value', self.strip_name(destination))
             sparql_phrase = sparql_phrase.replace('datatype', self.g.nodes[destination]['data_type'])
             sparql_phrase_list.append(sparql_phrase)
@@ -245,7 +245,7 @@ def create_query(self, source, destinations, values = None):
             if not len(data_destinations) == len(values):
                 raise ValueError(f'Length of destinations and values should be same, found {len(data_destinations)} and {len(values)}')
             if len(data_destinations) > 0:
-                filter_text = validate_values(data_destinations, values)
+                filter_text = self.validate_values(data_destinations, values)
         query.append(filter_text)
         query.append('}')
         return '\n'.join(query)

From 77eb6066db21243415db20f6f838f5d956b9aeab Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Tue, 18 Jul 2023 14:06:38 +0200
Subject: [PATCH 22/24] add variable condition specification

---
 pyscal_rdf/network/network.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py
index d7e3f31..ad0c8ec 100644
--- a/pyscal_rdf/network/network.py
+++ b/pyscal_rdf/network/network.py
@@ -3,6 +3,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import os
+import warnings
 from pyscal_rdf.network.parser import OntoParser
 
 owlfile = os.path.join(os.path.dirname(__file__), "../data/cmso.owl")
@@ -243,9 +244,11 @@ def create_query(self, source, destinations, values = None):
             lit_nodes = [node for node in self.g.nodes if 'node_type' in self.g.nodes[node].keys() and self.g.nodes[node]['node_type'] == 'literal']
             data_destinations = [destination for destination in destinations if destination in lit_nodes]
             if not len(data_destinations) == len(values):
-                raise ValueError(f'Length of destinations and values should be same, found {len(data_destinations)} and {len(values)}')
-            if len(data_destinations) > 0:
-                filter_text = self.validate_values(data_destinations, values)
+                warnings.warn(f'Length of destinations and values are not same, found {len(data_destinations)} and {len(values)}')
+                considered = " ".join(data_destinations[:len(values)])
+                warnings.warn(f'Conditions are considered for {considered}')
+            if len(values) > 0:
+                filter_text = self.validate_values(data_destinations[:len(values)], values)
         query.append(filter_text)
         query.append('}')
         return '\n'.join(query)

From 02925e3f5068826264b23617ffbb151c54bae77c Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Tue, 18 Jul 2023 14:55:55 +0200
Subject: [PATCH 23/24] fix namespaces in ontology

---
 pyscal_rdf/graph.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py
index 3401f35..409f77e 100644
--- a/pyscal_rdf/graph.py
+++ b/pyscal_rdf/graph.py
@@ -21,9 +21,9 @@
 #from pyscal3.core import System
 from pyscal3.atoms import Atoms
 
-CMSO = Namespace("https://purls.helmholtz-metadaten.de/cmso/")
-PLDO = Namespace("https://purls.helmholtz-metadaten.de/pldo/")
-PODO = Namespace("https://purls.helmholtz-metadaten.de/podo/")
+CMSO = Namespace("http://purls.helmholtz-metadaten.de/cmso/")
+PLDO = Namespace("http://purls.helmholtz-metadaten.de/pldo/")
+PODO = Namespace("http://purls.helmholtz-metadaten.de/podo/")
 
 defstyledict = {
     "BNode": {"color": "#ffe6ff", 

From 12bf23ed598466cc4526ffbc5354956126b79249 Mon Sep 17 00:00:00 2001
From: Sarath Menon <sarathmenon@mailbox.org>
Date: Wed, 13 Sep 2023 13:19:33 +0200
Subject: [PATCH 24/24] change graph code

---
 notebooks/example.ipynb          | 2 +-
 notebooks/memory_profiling.ipynb | 2 +-
 pyscal_rdf/graph.py              | 6 +++++-
 pyscal_rdf/network/network.py    | 4 ++--
 pyscal_rdf/network/parser.py     | 8 ++++++--
 5 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb
index dbf5756..e5ca59a 100644
--- a/notebooks/example.ipynb
+++ b/notebooks/example.ipynb
@@ -746,7 +746,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/memory_profiling.ipynb b/notebooks/memory_profiling.ipynb
index 40a06e0..979eb4f 100644
--- a/notebooks/memory_profiling.ipynb
+++ b/notebooks/memory_profiling.ipynb
@@ -445,7 +445,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py
index 409f77e..8b324fa 100644
--- a/pyscal_rdf/graph.py
+++ b/pyscal_rdf/graph.py
@@ -882,7 +882,11 @@ def query(self, inquery):
         """
         res = self.graph.query(inquery)
         if res is not None:
-            return pd.DataFrame(res)
+            for line in inquery.split('\n'):
+                if 'SELECT DISTINCT' in line:
+                    break
+            labels = [x[1:] for x in line.split()[2:]]
+            return pd.DataFrame(res, columns=labels)
         raise ValueError("SPARQL query returned None")
 
 
diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py
index ad0c8ec..0ff5faf 100644
--- a/pyscal_rdf/network/network.py
+++ b/pyscal_rdf/network/network.py
@@ -15,12 +15,12 @@ class OntologyNetwork:
     """
     Network representation of Onto
     """
-    def __init__(self, infile=None):
+    def __init__(self, infile=None, delimiter='/'):
         if infile is None:
             infile = owlfile
             
         self.g = nx.DiGraph()
-        self.onto = OntoParser(infile)
+        self.onto = OntoParser(infile, delimiter=delimiter)
         self.data_prefix = 'value'
         self._parse_all()
         
diff --git a/pyscal_rdf/network/parser.py b/pyscal_rdf/network/parser.py
index 2af5442..af11012 100644
--- a/pyscal_rdf/network/parser.py
+++ b/pyscal_rdf/network/parser.py
@@ -10,6 +10,8 @@ class OntoParser:
     def __init__(self, infile, delimiter='/'):
         if os.path.exists(infile):
             self.tree = get_ontology(f'file://{infile}').load()
+        elif infile[:4] == 'http':
+            self.tree = get_ontology(infile)
         else:
             raise FileNotFoundError(f'file {infile} not found!')
         self.attributes = {}
@@ -131,10 +133,12 @@ def _parse_object_property(self):
             self.attributes['object_property'][term.name] = term
             for d in dm:
                 if d!='07:owl#Thing':
-                    self.attributes['class'][d].is_range_of.append(term.name)
+                    if d in self.attributes['class']:
+                        self.attributes['class'][d].is_range_of.append(term.name)
             for r in rn:
                 if r!='07:owl#Thing':
-                    self.attributes['class'][d].is_domain_of.append(term.name)
+                    if d in self.attributes['class']:
+                        self.attributes['class'][d].is_domain_of.append(term.name)
                 
     def _parse_class_basic(self):
         classes = []