From 5ce88f7e6f2444eb8516115ce44e3be8a77054a5 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Fri, 14 Jul 2023 15:29:03 +0200 Subject: [PATCH 01/24] add draft of new parser --- pyscal_rdf/parser.py | 176 +++++++++++++++++++++++++++++++++++++++++++ pyscal_rdf/term.py | 43 +++++++++++ 2 files changed, 219 insertions(+) create mode 100644 pyscal_rdf/parser.py create mode 100644 pyscal_rdf/term.py diff --git a/pyscal_rdf/parser.py b/pyscal_rdf/parser.py new file mode 100644 index 0000000..0753c25 --- /dev/null +++ b/pyscal_rdf/parser.py @@ -0,0 +1,176 @@ +from onto_network.term import OntoTerm +from owlready2 import get_ontology + +import os +import copy +import numpy as np +import itertools + +class OntoParser: + def __init__(self, infile, delimiter='/'): + if os.path.exists(infile): + self.tree = get_ontology(f'file://{infile}').load() + else: + raise FileNotFoundError(f'file {infile} not found!') + self.attributes = {} + self.attributes['class'] = {} + self.attributes['object_property'] = {} + self.attributes['data_property'] = {} + self.delimiter = delimiter + self._parse_class() + self._parse_object_property() + self._parse_data_property() + + def _strip_name(self, uri): + uri_split = uri.split(self.delimiter) + if len(uri_split)>1: + return ":".join(uri_split[-2:]) + else: + return uri + + def _strip_datatype(self, uri, delimiter='#'): + uri_split = uri.split(delimiter) + return uri_split[-1] + + def _dict_to_lst(self, d): + return [val for key, val in d.items()] + + def _get_subclasses(self, name): + arg = self._in_which_bin_is_class(name) + if arg is not None: + return self.classes[arg] + else: + return [name] + + def _parse_data_property(self): + for c in self.tree.data_properties(): + iri = c.iri + dm = c.domain + try: + dm = [self._strip_name(d.iri) for d in dm[0].Classes] + except: + dm = [self._strip_name(d.iri) for d in dm] + + #now get subclasses + dm = [self._get_subclasses(d) for d in dm] + dm = list(itertools.chain(*dm)) + + rn = c.range + try: + rn = [r.__name__ for r in rn[0].Classes if r is not None] + except: + rn = [r.__name__ for r in rn if r is not None] + term = OntoTerm(iri) + term.domain = dm + term.range = rn + term.node_type = 'data_property' + self.attributes['data_property'][term.name] = term + #assign this data + for d in dm: + if d!='07:owl#Thing': + self.attributes['class'][d].is_range_of.append(term.name) + + def _parse_object_property(self): + for c in self.tree.object_properties(): + iri = c.iri + dm = c.domain + try: + dm = [self._strip_name(d.iri) for d in dm[0].Classes] + except: + dm = [self._strip_name(d.iri) for d in dm] + + #now get subclasses + dm = [self._get_subclasses(d) for d in dm] + dm = list(itertools.chain(*dm)) + + rn = c.range + try: + rn = [self._strip_name(r.iri) for r in rn[0].Classes] + except: + rn = [self._strip_name(r.iri) for r in rn] + + #now get subclasses + rn = [self._get_subclasses(d) for d in rn] + rn = list(itertools.chain(*rn)) + + term = OntoTerm(iri) + term.domain = dm + term.range = rn + term.node_type = 'object_property' + self.attributes['object_property'][term.name] = term + for d in dm: + if d!='07:owl#Thing': + self.attributes['class'][d].is_range_of.append(term.name) + for r in rn: + if r!='07:owl#Thing': + self.attributes['class'][d].is_domain_of.append(term.name) + + def _parse_class_basic(self): + classes = [] + for c in self.tree.classes(): + iri = c.iri + #print(iri) + try: + subclasses = self.tree.search(subclass_of=getattr(self.tree, c.name)) + for sb in subclasses: + term = OntoTerm(sb.iri) + term.node_type ='class' + self.attributes['class'][term.name] = term + subclasses = [self._strip_name(sb.iri) for sb in subclasses] + classes.append(subclasses) + except: + term = OntoTerm(c.iri) + term.node_type ='class' + self.attributes['class'][term.name] = term + classes.append([self._strip_name(c.iri)]) + return classes + + def _aggregate_keys(self, dd): + lst = copy.deepcopy(dd) + #choose the first list + large_list = [] + start = lst[0] + #delete it from the main list + nruns = len(lst) + del lst[0] + #now loop, if there is intersection add to this list + while True: + found = False + index_to_delete = [] + for count, ls in enumerate(lst): + common = len(list(set(start) & set(ls))) + #print(common) + if common>0: + #common elements found! merge them + for l in ls: + start.append(l) + found = True + index_to_delete.append(count) + if found: + for ii in index_to_delete[::-1]: + del lst[ii] + else: + large_list.append(np.unique(start)) + if len(lst)==0: + break + else: + start = lst[0] + del lst[0] + return large_list + + def _parse_class(self): + sub_classes = self._parse_class_basic() + #now we have to go through and clean up sub classes + sub_classes = self._aggregate_keys(sub_classes) + self.classes = sub_classes + + def _in_which_bin_is_class(self, name): + for count, lst in enumerate(self.classes): + if name in lst: + return count + else: + return None + + + + \ No newline at end of file diff --git a/pyscal_rdf/term.py b/pyscal_rdf/term.py new file mode 100644 index 0000000..53e5515 --- /dev/null +++ b/pyscal_rdf/term.py @@ -0,0 +1,43 @@ + +class OntoTerm: + def __init__(self, uri, node_type=None, + dm=[], rn=[], data_type=None, + node_id=None, delimiter='/'): + """ + This is class that represents an ontology element + """ + self.uri = uri + #name of the class + self._name = None + #type: can be object property, data property, or class + self.node_type = node_type + #now we need domain and range + self.domain = dm + self.range = rn + #datatype, that is only need for data properties + self.data_type = data_type + #identifier + self.node_id = node_id + self.subclasses = [] + self.delimiter = delimiter + self.is_domain_of = [] + self.is_range_of = [] + + @property + def uri(self): + return self._uri + + @uri.setter + def uri(self, val): + self._uri = val + + @property + def name(self): + uri_split = self.uri.split(self.delimiter) + if len(uri_split)>1: + return ":".join(uri_split[-2:]) + else: + return self.uri + + def __repr__(self): + return str(self.name) \ No newline at end of file From 217b32d81ff338365ed3eaa2e20b85a4d89c45f8 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Fri, 14 Jul 2023 15:33:48 +0200 Subject: [PATCH 02/24] update network --- pyscal_rdf/network.py | 144 ++++++++++++++++++------------------------ 1 file changed, 60 insertions(+), 84 deletions(-) diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py index 1afe09d..131f390 100644 --- a/pyscal_rdf/network.py +++ b/pyscal_rdf/network.py @@ -1,99 +1,75 @@ import networkx as nx +import graphviz import matplotlib.pyplot as plt import numpy as np +import os +from pyscal_rdf.parser import OntoParser -class Network: +owlfile = os.path.join(os.path.dirname(__file__), "data/cmso.owl") + +def _replace_name(name): + return ".".join(name.split(':')) + +class OntologyNetwork: """ - Network representation of CMSO + Network representation of Onto """ - def __init__(self): + def __init__(self, infile=None): + if infile is None: + infile = owlfile + self.g = nx.DiGraph() - - def add(self, sub, pred, obj, dtype=None, pred_prefix="cmso"): - pred = f'{pred_prefix}:{pred}' - self.g.add_node(sub, node_type="object") - self.g.add_node(pred, node_type="property") - if dtype is not None: - nd = "data" - else: - nd = "object" - self.g.add_node(obj, dtype=dtype, node_type=nd) - self.g.add_edge(sub, pred) - self.g.add_edge(pred, obj) - - def draw(self): - nx.draw(self.g, with_labels=True, font_weight='bold') + self.onto = OntoParser(infile) + self.data_prefix = 'value' + #call methods + self._add_class_nodes() + self._add_object_properties() + self._add_data_properties() + def get_shortest_path(self, source, target): path = nx.shortest_path(self.g, source=source, target=target) return path -class OntologyNetwork(Network): - def __init__(self): - super().__init__() - self.add("Sample", "hasMaterial", "Material") - self.add("Material", "hasElementRatio", "ElementRatio", dtype="string") - - self.add("Sample", "hasSimulationCell", "SimulationCell") - self.add("SimulationCell", "hasVolume", "Volume", dtype="float") - self.add("Sample", "hasNumberOfAtoms", "NumberOfAtoms", dtype="integer") - - self.add("SimulationCell", "hasLength", "SimulationCellLength") - self.add("SimulationCellLength", "hasLength_x", "SimulationCellLength_x", dtype="float") - self.add("SimulationCellLength", "hasLength_y", "SimulationCellLength_y", dtype="float") - self.add("SimulationCellLength", "hasLength_z", "SimulationCellLength_z", dtype="float") - - self.add("SimulationCell", "hasVector", "SimulationCellVectorA") - self.add("SimulationCellVectorA", "hasComponent_x", "SimulationCellVectorA_x", dtype="float") - self.add("SimulationCellVectorA", "hasComponent_y", "SimulationCellVectorA_y", dtype="float") - self.add("SimulationCellVectorA", "hasComponent_z", "SimulationCellVectorA_z", dtype="float") - self.add("SimulationCell", "hasVector", "SimulationCellVectorB") - self.add("SimulationCellVectorB", "hasComponent_x", "SimulationCellVectorB_x", dtype="float") - self.add("SimulationCellVectorB", "hasComponent_y", "SimulationCellVectorB_y", dtype="float") - self.add("SimulationCellVectorB", "hasComponent_z", "SimulationCellVectorB_z", dtype="float") - self.add("SimulationCell", "hasVector", "SimulationCellVectorC") - self.add("SimulationCellVectorC", "hasComponent_x", "SimulationCellVectorC_x", dtype="float") - self.add("SimulationCellVectorC", "hasComponent_y", "SimulationCellVectorC_y", dtype="float") - self.add("SimulationCellVectorC", "hasComponent_z", "SimulationCellVectorC_z", dtype="float") - - self.add("SimulationCell", "hasAngle", "SimulationCellAngle") - self.add("SimulationCellAngle", "hasAngle_alpha", "SimulationCellAngle_alpha", dtype="float") - self.add("SimulationCellAngle", "hasAngle_beta", "SimulationCellAngle_beta", dtype="float") - self.add("SimulationCellAngle", "hasAngle_gamma", "SimulationCellAngle_gamma", dtype="float") - - self.add("Material", "hasStructure", "CrystalStructure") - self.add("CrystalStructure", "hasAltName", "CrystalStructureAltName", dtype="string") - self.add("CrystalStructure", "hasSpaceGroupSymbol", "SpaceGroupSymbol", dtype="string") - self.add("CrystalStructure", "hasSpaceGroupNumber", "SpaceGroupNumber", dtype="integer") - - self.add("CrystalStructure", "hasUnitCell", "UnitCell") - self.add("UnitCell", "hasBravaisLattice", "LatticeSystem") - self.add("UnitCell", "hasLatticeParameter", "LatticeParameter") - self.add("LatticeParameter", "hasLength_x", "LatticeParameter_x", dtype="float") - self.add("LatticeParameter", "hasLength_y", "LatticeParameter_y", dtype="float") - self.add("LatticeParameter", "hasLength_z", "LatticeParameter_z", dtype="float") - self.add("UnitCell", "hasAngle", "LatticeAngle") - self.add("LatticeAngle", "hasAngle_alpha", "LatticeAngle_alpha", dtype="float") - self.add("LatticeAngle", "hasAngle_beta", "LatticeAngle_beta", dtype="float") - self.add("LatticeAngle", "hasAngle_gamma", "LatticeAngle_gamma", dtype="float") - - #add GB properties - self.add("Material", "hasDefect", "Defect", pred_prefix="cmso") - self.add("Defect", "type", "GrainBoundary", pred_prefix="rdf") - self.add("Defect", "type", "TwistBoundary", pred_prefix="rdf") - self.add("Defect", "type", "TiltBoundary", pred_prefix="rdf") - self.add("Defect", "type", "SymmetricTiltBoundary", pred_prefix="rdf") - self.add("Defect", "type", "MixedBoundary", pred_prefix="rdf") - self.add("Defect", "hasSigmaValue", "Sigma", dtype="integer", pred_prefix="pldo") - self.add("Defect", "hasGBPlane", "GBPlane", pred_prefix="pldo", dtype="string") - self.add("Defect", "hasRotationAxis", "RotationAxis", pred_prefix="pldo", dtype="string") - self.add("Defect", "hasMisorientationAngle", "MisorientationAngle", pred_prefix="pldo", dtype="float") - - #add vacancy - self.add("Defect", "type", "Vacancy", pred_prefix="rdf") - self.add("SimulationCell", "hasVacancyConcentration", "VacancyConcentration", pred_prefix="podo", dtype="float") - self.add("SimulationCell", "hasNumberOfVacancies", "NumberOfVacancy", pred_prefix="podo", dtype="integer") - + def _add_class_nodes(self): + for key, val in self.onto.attributes['class'].items(): + self.g.add_node(val.name, node_type='class') + + def _add_object_properties(self): + for key, val in self.onto.attributes['object_property'].items(): + self.g.add_node(val.name, node_type='object_property') + #find domain + for d in val.domain: + self.g.add_edge(d, val.name) + for r in val.range: + self.g.add_edge(val.name, r) + + def _add_data_properties(self): + for key, val in self.onto.attributes['data_property'].items(): + self.g.add_node(val.name, node_type='data_property') + for d in val.domain: + self.g.add_edge(d, val.name) + for r in val.range: + data_node = f'{val.name}{self.data_prefix}' + self.g.add_node(data_node, node_type='literal') + self.g.add_edge(val.name, data_node) + + + def draw(self, styledict = {"class": {"shape":"box"}, + "object_property": {"shape":"ellipse"}, + "data_property": {"shape":"ellipse"}, + "literal": {"shape":"parallelogram"},}): + dot = graphviz.Digraph() + node_list = list(self.g.nodes(data='node_type')) + edge_list = list(self.g.edges) + for node in node_list: + name = _replace_name(node[0]) + if node[1] is not None: + t = node[1] + dot.node(name, shape=styledict[t]['shape'], fontsize="6") + for edge in edge_list: + dot.edge(_replace_name(edge[0]), _replace_name(edge[1])) + return dot def get_path_from_sample(self, target): path = self.get_shortest_path(source="Sample", target=target) From 36995f4982938be66ae42140dfa793499f634fd9 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 12:58:09 +0200 Subject: [PATCH 03/24] add add method for parser --- pyscal_rdf/network.py | 3 +++ pyscal_rdf/parser.py | 26 ++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py index 131f390..9f754d8 100644 --- a/pyscal_rdf/network.py +++ b/pyscal_rdf/network.py @@ -26,6 +26,9 @@ def __init__(self, infile=None): self._add_class_nodes() self._add_object_properties() self._add_data_properties() + + def __add__(self, ontonetwork): + pass def get_shortest_path(self, source, target): path = nx.shortest_path(self.g, source=source, target=target) diff --git a/pyscal_rdf/parser.py b/pyscal_rdf/parser.py index 0753c25..3e777ff 100644 --- a/pyscal_rdf/parser.py +++ b/pyscal_rdf/parser.py @@ -17,10 +17,32 @@ def __init__(self, infile, delimiter='/'): self.attributes['object_property'] = {} self.attributes['data_property'] = {} self.delimiter = delimiter + self.classes = None self._parse_class() self._parse_object_property() - self._parse_data_property() - + self._parse_data_property() + + def __add__(self, ontoparser): + """ + Add method; in principle it should add- + - classes + - attributes dict + """ + for mainkey in ['class', 'object_property', 'data_property']: + if mainkey in ontoparser.attributes.keys(): + for key, val in ontoparser.attributes[mainkey].items(): + self.attributes[mainkey][key] = val + + #now change classes + if ontoparser.classes is not None: + for clx in ontoparser.classes: + self.classes.append(clx) + + return self + + def __radd__(self, ontoparser): + return self.__add__(ontoparser) + def _strip_name(self, uri): uri_split = uri.split(self.delimiter) if len(uri_split)>1: From 3c9018e2591b5209199493db208112503a571954 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 13:07:14 +0200 Subject: [PATCH 04/24] add add method for ontonetwork --- pyscal_rdf/network.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py index 9f754d8..7a2c842 100644 --- a/pyscal_rdf/network.py +++ b/pyscal_rdf/network.py @@ -22,14 +22,21 @@ def __init__(self, infile=None): self.onto = OntoParser(infile) self.data_prefix = 'value' + def _parse_all(self): #call methods self._add_class_nodes() self._add_object_properties() self._add_data_properties() def __add__(self, ontonetwork): - pass - + #add onto network + self.onto = self.onto + ontonetwork.onto + #now parse again + self._parse_all() + + def __radd__(self, ontonetwork): + return self.__add__(ontonetwork) + def get_shortest_path(self, source, target): path = nx.shortest_path(self.g, source=source, target=target) return path From 51ecb3b5fad9d6fac259bff2466153c688e69ed6 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 13:13:17 +0200 Subject: [PATCH 05/24] fix bug in add --- pyscal_rdf/network.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py index 7a2c842..2c02bcf 100644 --- a/pyscal_rdf/network.py +++ b/pyscal_rdf/network.py @@ -33,6 +33,7 @@ def __add__(self, ontonetwork): self.onto = self.onto + ontonetwork.onto #now parse again self._parse_all() + return self def __radd__(self, ontonetwork): return self.__add__(ontonetwork) From 8421051584601cf12e23dc547b3cab970a387383 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 13:14:07 +0200 Subject: [PATCH 06/24] add overload of attributes --- pyscal_rdf/network.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py index 2c02bcf..bc361b4 100644 --- a/pyscal_rdf/network.py +++ b/pyscal_rdf/network.py @@ -35,6 +35,9 @@ def __add__(self, ontonetwork): self._parse_all() return self + def attributes(self): + return self.onto.attributes + def __radd__(self, ontonetwork): return self.__add__(ontonetwork) From 1cf4c99bcba367a889a246b83bd4829b2fa7a700 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 13:26:18 +0200 Subject: [PATCH 07/24] add possibility to add path --- pyscal_rdf/network.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py index bc361b4..c739062 100644 --- a/pyscal_rdf/network.py +++ b/pyscal_rdf/network.py @@ -69,6 +69,29 @@ def _add_data_properties(self): self.g.add_edge(val.name, data_node) + def add_path(self, triple): + """ + Add a triple as path. Note that all attributes of the triple should already + exist in the graph. The ontology itself is not modified. Only the graph + representation of it is. + The expected use is to bridge between two(or more) different ontologies. + Therefore, mapping can only be between classes. + """ + sub = triple[0] + pred = triple[1] + obj = triple[2] + + for item in [sub, obj]: + if not item in self.attributes['class'].keys(): + raise ValueError(f'{item} not found in self.attributes') + + if pred not in self.attributesp['object_property'].keys(): + raise ValueError(f'{pred} not found in self.attributes') + + #now add + self.g.add_edge(sub, pred) + self.g.add_edge(pred, obj) + def draw(self, styledict = {"class": {"shape":"box"}, "object_property": {"shape":"ellipse"}, "data_property": {"shape":"ellipse"}, From c69229bf6d1ad283182b1bed61092708a60b9207 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 13:37:45 +0200 Subject: [PATCH 08/24] map subclasses in add_path --- pyscal_rdf/network.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py index c739062..26abd19 100644 --- a/pyscal_rdf/network.py +++ b/pyscal_rdf/network.py @@ -89,8 +89,13 @@ def add_path(self, triple): raise ValueError(f'{pred} not found in self.attributes') #now add - self.g.add_edge(sub, pred) - self.g.add_edge(pred, obj) + subclasses = self.onto._get_subclasses(sub) + for subclass in subclasses: + self.g.add_edge(subclass, pred) + + subclasses = self.onto._get_subclasses(obj) + for subclass in subclasses: + self.g.add_edge(pred, subclass) def draw(self, styledict = {"class": {"shape":"box"}, "object_property": {"shape":"ellipse"}, From 3ce4928c8d10a74a333fd2aaeba2c1b21434b22b Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 13:57:22 +0200 Subject: [PATCH 09/24] add possibilities to add data nodes --- pyscal_rdf/network.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py index 26abd19..ca83e71 100644 --- a/pyscal_rdf/network.py +++ b/pyscal_rdf/network.py @@ -81,21 +81,30 @@ def add_path(self, triple): pred = triple[1] obj = triple[2] - for item in [sub, obj]: - if not item in self.attributes['class'].keys(): - raise ValueError(f'{item} not found in self.attributes') - - if pred not in self.attributesp['object_property'].keys(): - raise ValueError(f'{pred} not found in self.attributes') + if sub not in self.onto.attributes['class'].keys(): + raise ValueError(f'{sub} not found in self.attributes') #now add subclasses = self.onto._get_subclasses(sub) for subclass in subclasses: - self.g.add_edge(subclass, pred) + self.g.add_edge(subclass, pred) + + #now add pred + if pred in self.onto.attributes['object_property'].keys(): + if obj not in self.onto.attributes['class'].keys(): + raise ValueError(f'{obj} not found in self.attributes') + subclasses = self.onto._get_subclasses(obj) + for subclass in subclasses: + self.g.add_edge(pred, subclass) - subclasses = self.onto._get_subclasses(obj) - for subclass in subclasses: - self.g.add_edge(pred, subclass) + #another possibility it is data property + elif pred in self.onto.attributes['data_property'].keys(): + data_node = f'{pred}{self.data_prefix}' + self.g.add_node(data_node, node_type='literal') + self.g.add_edge(pred, data_node) + + else: + raise ValueError(f'{pred} not found in self.attributes') def draw(self, styledict = {"class": {"shape":"box"}, "object_property": {"shape":"ellipse"}, From 044aa7343051092462366fd722c0b564fcca2256 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 14:46:47 +0200 Subject: [PATCH 10/24] bug fi in draw --- pyscal_rdf/network.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py index ca83e71..e260169 100644 --- a/pyscal_rdf/network.py +++ b/pyscal_rdf/network.py @@ -21,6 +21,7 @@ def __init__(self, infile=None): self.g = nx.DiGraph() self.onto = OntoParser(infile) self.data_prefix = 'value' + self._parse_all() def _parse_all(self): #call methods @@ -35,6 +36,7 @@ def __add__(self, ontonetwork): self._parse_all() return self + @property def attributes(self): return self.onto.attributes From cd593afb9881b70e9b624ba8954867ae39f76ebd Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 14:53:50 +0200 Subject: [PATCH 11/24] add datatype --- pyscal_rdf/network.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py index e260169..cbe46bf 100644 --- a/pyscal_rdf/network.py +++ b/pyscal_rdf/network.py @@ -67,8 +67,9 @@ def _add_data_properties(self): self.g.add_edge(d, val.name) for r in val.range: data_node = f'{val.name}{self.data_prefix}' - self.g.add_node(data_node, node_type='literal') + self.g.add_node(data_node, node_type='literal', data_type=r.name) self.g.add_edge(val.name, data_node) + def add_path(self, triple): From 0b3b2133e2e939c726001f802d343aa89cbac3c7 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 14:58:48 +0200 Subject: [PATCH 12/24] add possibility to add datatype --- pyscal_rdf/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py index cbe46bf..4f8977a 100644 --- a/pyscal_rdf/network.py +++ b/pyscal_rdf/network.py @@ -103,7 +103,7 @@ def add_path(self, triple): #another possibility it is data property elif pred in self.onto.attributes['data_property'].keys(): data_node = f'{pred}{self.data_prefix}' - self.g.add_node(data_node, node_type='literal') + self.g.add_node(data_node, node_type='literal', data_type=obj) self.g.add_edge(pred, data_node) else: From 222bd00c42d9e2dda9ab750011fe5bd9bcbc5b73 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 14:59:49 +0200 Subject: [PATCH 13/24] bug fix in datatype --- pyscal_rdf/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py index 4f8977a..80b2030 100644 --- a/pyscal_rdf/network.py +++ b/pyscal_rdf/network.py @@ -67,7 +67,7 @@ def _add_data_properties(self): self.g.add_edge(d, val.name) for r in val.range: data_node = f'{val.name}{self.data_prefix}' - self.g.add_node(data_node, node_type='literal', data_type=r.name) + self.g.add_node(data_node, node_type='literal', data_type=r) self.g.add_edge(val.name, data_node) From 064c25409d943037bdc5893cfb1870a196d08417 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 15:34:05 +0200 Subject: [PATCH 14/24] add namespace in the parser --- pyscal_rdf/parser.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyscal_rdf/parser.py b/pyscal_rdf/parser.py index 3e777ff..d360052 100644 --- a/pyscal_rdf/parser.py +++ b/pyscal_rdf/parser.py @@ -18,6 +18,7 @@ def __init__(self, infile, delimiter='/'): self.attributes['data_property'] = {} self.delimiter = delimiter self.classes = None + self.namespaces = [self.tree.base_iri] self._parse_class() self._parse_object_property() self._parse_data_property() @@ -38,6 +39,9 @@ def __add__(self, ontoparser): for clx in ontoparser.classes: self.classes.append(clx) + for ns in ontoparser.namespaces: + self.namespaces.append(ns) + return self def __radd__(self, ontoparser): From c81ef2a1aa0d84bd9f44035b7d721ae39e7afd1d Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 15:39:52 +0200 Subject: [PATCH 15/24] convert namespace to dict --- pyscal_rdf/network.py | 8 ++++++++ pyscal_rdf/parser.py | 11 ++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network.py index 80b2030..f9e1a78 100644 --- a/pyscal_rdf/network.py +++ b/pyscal_rdf/network.py @@ -40,6 +40,14 @@ def __add__(self, ontonetwork): def attributes(self): return self.onto.attributes + @property + def namespaces(self): + return self.onto.namespaces + + @property + def extra_namespaces(self): + return self.onto.extra_namespaces + def __radd__(self, ontonetwork): return self.__add__(ontonetwork) diff --git a/pyscal_rdf/parser.py b/pyscal_rdf/parser.py index d360052..98391c2 100644 --- a/pyscal_rdf/parser.py +++ b/pyscal_rdf/parser.py @@ -18,7 +18,9 @@ def __init__(self, infile, delimiter='/'): self.attributes['data_property'] = {} self.delimiter = delimiter self.classes = None - self.namespaces = [self.tree.base_iri] + self.namespaces = {self.tree.name: self.tree.base_iri} + self.extra_namespaces = {'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', + } self._parse_class() self._parse_object_property() self._parse_data_property() @@ -39,8 +41,11 @@ def __add__(self, ontoparser): for clx in ontoparser.classes: self.classes.append(clx) - for ns in ontoparser.namespaces: - self.namespaces.append(ns) + for key, val in ontoparser.namespaces.items(): + self.namespaces[key] = val + + for key, val in ontoparser.extra_namespaces.items(): + self.extra_namespaces[key] = val return self From 0f764b4a9941f1202d7f728ea4847ad60c99586b Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 15:43:37 +0200 Subject: [PATCH 16/24] restructure network code --- pyscal_rdf/network/__init__.py | 0 pyscal_rdf/{ => network}/network.py | 0 pyscal_rdf/{ => network}/parser.py | 0 pyscal_rdf/{ => network}/term.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 pyscal_rdf/network/__init__.py rename pyscal_rdf/{ => network}/network.py (100%) rename pyscal_rdf/{ => network}/parser.py (100%) rename pyscal_rdf/{ => network}/term.py (100%) diff --git a/pyscal_rdf/network/__init__.py b/pyscal_rdf/network/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyscal_rdf/network.py b/pyscal_rdf/network/network.py similarity index 100% rename from pyscal_rdf/network.py rename to pyscal_rdf/network/network.py diff --git a/pyscal_rdf/parser.py b/pyscal_rdf/network/parser.py similarity index 100% rename from pyscal_rdf/parser.py rename to pyscal_rdf/network/parser.py diff --git a/pyscal_rdf/term.py b/pyscal_rdf/network/term.py similarity index 100% rename from pyscal_rdf/term.py rename to pyscal_rdf/network/term.py From 1528b4a23e73c95622f2c032842e0c936ed314a6 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 15:45:07 +0200 Subject: [PATCH 17/24] fix imports --- pyscal_rdf/graph.py | 2 +- pyscal_rdf/network/network.py | 2 +- pyscal_rdf/network/parser.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 1d8cb4e..61f054c 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -15,7 +15,7 @@ import pandas as pd from pyscal_rdf.visualize import visualize_graph -from pyscal_rdf.network import OntologyNetwork +from pyscal_rdf.network.network import OntologyNetwork from pyscal_rdf.rdfsystem import System import pyscal_rdf.properties as prp #from pyscal3.core import System diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py index f9e1a78..a83b1e3 100644 --- a/pyscal_rdf/network/network.py +++ b/pyscal_rdf/network/network.py @@ -3,7 +3,7 @@ import matplotlib.pyplot as plt import numpy as np import os -from pyscal_rdf.parser import OntoParser +from pyscal_rdf.network.parser import OntoParser owlfile = os.path.join(os.path.dirname(__file__), "data/cmso.owl") diff --git a/pyscal_rdf/network/parser.py b/pyscal_rdf/network/parser.py index 98391c2..2af5442 100644 --- a/pyscal_rdf/network/parser.py +++ b/pyscal_rdf/network/parser.py @@ -1,4 +1,4 @@ -from onto_network.term import OntoTerm +from pyscal_rdf.network.term import OntoTerm from owlready2 import get_ontology import os From 7744e9691a00a37fb2c7157e92d3ca1f1e1651fa Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 15:59:02 +0200 Subject: [PATCH 18/24] return triple directly from path --- pyscal_rdf/network/network.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py index a83b1e3..9f519f1 100644 --- a/pyscal_rdf/network/network.py +++ b/pyscal_rdf/network/network.py @@ -51,8 +51,13 @@ def extra_namespaces(self): def __radd__(self, ontonetwork): return self.__add__(ontonetwork) - def get_shortest_path(self, source, target): + def get_shortest_path(self, source, target, triples=False): path = nx.shortest_path(self.g, source=source, target=target) + if triples: + triple_list = [] + for x in range(len(path)//2): + triple_list.append(path[2*x:2*x+3]) + return triple_list return path def _add_class_nodes(self): From cf76df4e56848c6c90c5625373bbd48a85c49f35 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 16:26:09 +0200 Subject: [PATCH 19/24] add strip name --- pyscal_rdf/network/network.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py index 9f519f1..71fcaa7 100644 --- a/pyscal_rdf/network/network.py +++ b/pyscal_rdf/network/network.py @@ -36,6 +36,12 @@ def __add__(self, ontonetwork): self._parse_all() return self + def strip_name(self, name): + raw = name.split(':') + if len(raw) > 1: + return raw[-1] + return name + @property def attributes(self): return self.onto.attributes From be94d433243ff9ec9ba092a1b7ceea5f9ccb471f Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 23:36:02 +0200 Subject: [PATCH 20/24] add draft of new query --- pyscal_rdf/graph.py | 2 +- pyscal_rdf/network/network.py | 185 ++++++++++++++++++---------------- 2 files changed, 97 insertions(+), 90 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 61f054c..3401f35 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -93,7 +93,7 @@ def __init__(self, graph_file=None, self.material = None self.sysdict = None self.sgraph = None - self._query_graph = OntologyNetwork() + #self._query_graph = OntologyNetwork() self._atom_ids = None def process_structure(self, structure, format=None): diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py index 71fcaa7..135c889 100644 --- a/pyscal_rdf/network/network.py +++ b/pyscal_rdf/network/network.py @@ -5,7 +5,7 @@ import os from pyscal_rdf.network.parser import OntoParser -owlfile = os.path.join(os.path.dirname(__file__), "data/cmso.owl") +owlfile = os.path.join(os.path.dirname(__file__), "../data/cmso.owl") def _replace_name(name): return ".".join(name.split(':')) @@ -151,97 +151,104 @@ def get_path_from_sample(self, target): triplets.append(path[2*x:2*x+3]) return triplets - def formulate_query(self, target, value): - #first get triplets - triplets = self.get_path_from_sample(target) - #start building query - query = self._formulate_query_path(triplets) - query.append(self._formulate_filter_expression(triplets, value)) - query.append("}") - query = " ".join(query) - return query + def phrase_to_sparql(self, phrase): + def _extract_operation(phr): + r = phr.split(' ') + if len(r) != 3: + raise ValueError('wrong filters!') + return f'?value{r[1]}\"{r[2]}\"^^xsd:datatype' + + conditions = [] + operation = None - - def _formulate_query_path(self, triplets): + raw = phrase.split(' and ') + + if len(raw) > 1: + operation = '&&' + if operation is None: + raw = phrase.split(' or ') + if len(raw) > 1: + operation = '||' + + if operation is not None: + for ph in raw: + conditions.append(_extract_operation(ph)) + else: + conditions.append(_extract_operation(phrase)) + full_str = f' {operation} '.join(conditions) + #replace values + return full_str + + + def validate_values(self, destinations, values): + combinator_dict = {'and': '&&', 'or': '||'} + combinator_list = values[1::2] + phrase_list = values[::2] + if not len(combinator_list) == len(destinations)-1: + raise ValueError("Invalid combinations!") + + sparql_phrase_list = [] + for phrase, destination in zip(phrase_list, destinations): + sparql_phrase = phrase_to_sparql(phrase) + sparql_phrase = sparql_phrase.replace('value', self.strip_name(destination)) + sparql_phrase = sparql_phrase.replace('datatype', self.g.nodes[destination]['data_type']) + sparql_phrase_list.append(sparql_phrase) + + #combine phrases with phrase list + updated_sparql_phrase_list = [] + for count, sparql_phrase in enumerate(sparql_phrase_list): + updated_sparql_phrase_list.append(f'({sparql_phrase})') + if count < len(sparql_phrase_list)-1: + updated_sparql_phrase_list.append(combinator_dict[combinator_list[count]]) + + full_filter = " ".join(updated_sparql_phrase_list) + return f'FILTER ({full_filter})' + + def create_query(self, source, destinations, values = None): + """ + values is a dict with keys value, operation + """ + if not isinstance(destinations, list): + destinations = [destinations] + + #start prefix of quer query = [] - query.append("PREFIX cmso: ") - query.append("PREFIX pldo: ") - query.append("PREFIX podo: ") - query.append("PREFIX rdf: ") - query.append("SELECT DISTINCT ?sample") + for key, val in self.namespaces.items(): + query.append(f'PREFIX {key}: <{val}>') + for key, val in self.extra_namespaces.items(): + query.append(f'PREFIX {key}: <{val}>') + + #now for each destination, start adding the paths in the query + all_triplets = {} + for destination in destinations: + triplets = self.get_shortest_path(source, destination, triples=True) + all_triplets[destination] = triplets + + select_destinations = [f'?{self.strip_name(destination)}' for destination in destinations] + query.append(f'SELECT DISTINCT {" ".join(select_destinations)}') query.append("WHERE {") - for triple in triplets: - query.append(" ?%s %s ?%s ."%(triple[0].lower(), - triple[1], - triple[2].lower())) - return query - - def _formulate_filter_expression(self, triplets, value): - value, datatype = self._check_value(value) - last_val = self.g.nodes[triplets[-1][-1]] - last_val_name = triplets[-1][-1].lower() - - #if it is nodetype data - if last_val['node_type'] == "data": - if datatype == "multi_string": - qstr = self._formulate_or_string_query(last_val, - last_val_name, - value) - elif datatype == "multi_number": - qstr = self._formulate_range_number_query(last_val, - last_val_name, - value) - else: - qstr = self._formulate_equal_query(last_val, - last_val_name, - value) - return qstr - else: - raise NotImplementedError("Non-data queries are not implemented") - - def _check_value(self, value): - if isinstance(value, list): - if not len(value) == 2: - raise ValueError("value can be maximum length 2") - else: - value = [value] - if all(isinstance(x, str) for x in value): - datatype = "string" - elif all(isinstance(x, (int, float)) for x in value): - datatype = "number" - else: - raise TypeError("Values have to be of same type") - if len(value) == 1: - datatype = f'single_{datatype}' - else: - datatype = f'multi_{datatype}' - return value, datatype - - - def _formulate_equal_query(self, last_val, last_val_name, value): - qstr = "FILTER (?%s=\"%s\"^^xsd:%s)"%(last_val_name, - str(value[0]), - last_val['dtype']) - return qstr - - def _formulate_or_string_query(self, last_val, last_val_name, value): - qstr = "FILTER (?%s=\"%s\"^^xsd:%s || ?%s=\"%s\"^^xsd:%s)"%(last_val_name, - str(value[0]), - last_val['dtype'], - last_val_name, - str(value[1]), - last_val['dtype'],) - return qstr - - def _formulate_range_number_query(self, last_val, last_val_name, value): - value = np.sort(value) - qstr = "FILTER (?%s >= \"%s\"^^xsd:%s && ?%s <= \"%s\"^^xsd:%s)"%(last_val_name, - str(value[0]), - last_val['dtype'], - last_val_name, - str(value[1]), - last_val['dtype'],) - return qstr + + #now add corresponding triples + for destination in destinations: + for triple in all_triplets[destination]: + query.append(" ?%s %s ?%s ."%(self.strip_name(triple[0]), + triple[1], + self.strip_name(triple[2]))) + + #now we have to add filters + #filters are only needed if it is a dataproperty + filter_text = '' + + if values is not None: + lit_nodes = [node for node in self.g.nodes if 'node_type' in self.g.nodes[node].keys() and self.g.nodes[node]['node_type'] == 'literal'] + data_destinations = [destination for destination in destinations if destination in lit_nodes] + if not len(data_destinations) == len(values): + raise ValueError(f'Length of destinations and values should be same, found {len(data_destinations)} and {len(values)}') + if len(data_destinations) > 0: + filter_text = validate_values(data_destinations, values) + query.append(filter_text) + query.append('}') + return '\n'.join(query) From 5afa65e62e3d219e46aee3f6704308eba907560f Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Mon, 17 Jul 2023 23:38:06 +0200 Subject: [PATCH 21/24] bug fix in query --- pyscal_rdf/network/network.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py index 135c889..d7e3f31 100644 --- a/pyscal_rdf/network/network.py +++ b/pyscal_rdf/network/network.py @@ -189,7 +189,7 @@ def validate_values(self, destinations, values): sparql_phrase_list = [] for phrase, destination in zip(phrase_list, destinations): - sparql_phrase = phrase_to_sparql(phrase) + sparql_phrase = self.phrase_to_sparql(phrase) sparql_phrase = sparql_phrase.replace('value', self.strip_name(destination)) sparql_phrase = sparql_phrase.replace('datatype', self.g.nodes[destination]['data_type']) sparql_phrase_list.append(sparql_phrase) @@ -245,7 +245,7 @@ def create_query(self, source, destinations, values = None): if not len(data_destinations) == len(values): raise ValueError(f'Length of destinations and values should be same, found {len(data_destinations)} and {len(values)}') if len(data_destinations) > 0: - filter_text = validate_values(data_destinations, values) + filter_text = self.validate_values(data_destinations, values) query.append(filter_text) query.append('}') return '\n'.join(query) From 77eb6066db21243415db20f6f838f5d956b9aeab Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Tue, 18 Jul 2023 14:06:38 +0200 Subject: [PATCH 22/24] add variable condition specification --- pyscal_rdf/network/network.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py index d7e3f31..ad0c8ec 100644 --- a/pyscal_rdf/network/network.py +++ b/pyscal_rdf/network/network.py @@ -3,6 +3,7 @@ import matplotlib.pyplot as plt import numpy as np import os +import warnings from pyscal_rdf.network.parser import OntoParser owlfile = os.path.join(os.path.dirname(__file__), "../data/cmso.owl") @@ -243,9 +244,11 @@ def create_query(self, source, destinations, values = None): lit_nodes = [node for node in self.g.nodes if 'node_type' in self.g.nodes[node].keys() and self.g.nodes[node]['node_type'] == 'literal'] data_destinations = [destination for destination in destinations if destination in lit_nodes] if not len(data_destinations) == len(values): - raise ValueError(f'Length of destinations and values should be same, found {len(data_destinations)} and {len(values)}') - if len(data_destinations) > 0: - filter_text = self.validate_values(data_destinations, values) + warnings.warn(f'Length of destinations and values are not same, found {len(data_destinations)} and {len(values)}') + considered = " ".join(data_destinations[:len(values)]) + warnings.warn(f'Conditions are considered for {considered}') + if len(values) > 0: + filter_text = self.validate_values(data_destinations[:len(values)], values) query.append(filter_text) query.append('}') return '\n'.join(query) From 02925e3f5068826264b23617ffbb151c54bae77c Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Tue, 18 Jul 2023 14:55:55 +0200 Subject: [PATCH 23/24] fix namespaces in ontology --- pyscal_rdf/graph.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 3401f35..409f77e 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -21,9 +21,9 @@ #from pyscal3.core import System from pyscal3.atoms import Atoms -CMSO = Namespace("https://purls.helmholtz-metadaten.de/cmso/") -PLDO = Namespace("https://purls.helmholtz-metadaten.de/pldo/") -PODO = Namespace("https://purls.helmholtz-metadaten.de/podo/") +CMSO = Namespace("http://purls.helmholtz-metadaten.de/cmso/") +PLDO = Namespace("http://purls.helmholtz-metadaten.de/pldo/") +PODO = Namespace("http://purls.helmholtz-metadaten.de/podo/") defstyledict = { "BNode": {"color": "#ffe6ff", From 12bf23ed598466cc4526ffbc5354956126b79249 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Wed, 13 Sep 2023 13:19:33 +0200 Subject: [PATCH 24/24] change graph code --- notebooks/example.ipynb | 2 +- notebooks/memory_profiling.ipynb | 2 +- pyscal_rdf/graph.py | 6 +++++- pyscal_rdf/network/network.py | 4 ++-- pyscal_rdf/network/parser.py | 8 ++++++-- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb index dbf5756..e5ca59a 100644 --- a/notebooks/example.ipynb +++ b/notebooks/example.ipynb @@ -746,7 +746,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/notebooks/memory_profiling.ipynb b/notebooks/memory_profiling.ipynb index 40a06e0..979eb4f 100644 --- a/notebooks/memory_profiling.ipynb +++ b/notebooks/memory_profiling.ipynb @@ -445,7 +445,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 409f77e..8b324fa 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -882,7 +882,11 @@ def query(self, inquery): """ res = self.graph.query(inquery) if res is not None: - return pd.DataFrame(res) + for line in inquery.split('\n'): + if 'SELECT DISTINCT' in line: + break + labels = [x[1:] for x in line.split()[2:]] + return pd.DataFrame(res, columns=labels) raise ValueError("SPARQL query returned None") diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py index ad0c8ec..0ff5faf 100644 --- a/pyscal_rdf/network/network.py +++ b/pyscal_rdf/network/network.py @@ -15,12 +15,12 @@ class OntologyNetwork: """ Network representation of Onto """ - def __init__(self, infile=None): + def __init__(self, infile=None, delimiter='/'): if infile is None: infile = owlfile self.g = nx.DiGraph() - self.onto = OntoParser(infile) + self.onto = OntoParser(infile, delimiter=delimiter) self.data_prefix = 'value' self._parse_all() diff --git a/pyscal_rdf/network/parser.py b/pyscal_rdf/network/parser.py index 2af5442..af11012 100644 --- a/pyscal_rdf/network/parser.py +++ b/pyscal_rdf/network/parser.py @@ -10,6 +10,8 @@ class OntoParser: def __init__(self, infile, delimiter='/'): if os.path.exists(infile): self.tree = get_ontology(f'file://{infile}').load() + elif infile[:4] == 'http': + self.tree = get_ontology(infile) else: raise FileNotFoundError(f'file {infile} not found!') self.attributes = {} @@ -131,10 +133,12 @@ def _parse_object_property(self): self.attributes['object_property'][term.name] = term for d in dm: if d!='07:owl#Thing': - self.attributes['class'][d].is_range_of.append(term.name) + if d in self.attributes['class']: + self.attributes['class'][d].is_range_of.append(term.name) for r in rn: if r!='07:owl#Thing': - self.attributes['class'][d].is_domain_of.append(term.name) + if d in self.attributes['class']: + self.attributes['class'][d].is_domain_of.append(term.name) def _parse_class_basic(self): classes = []