From 53f70a96b20eb6198375cda17de7cff7b4ae426c Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Thu, 1 Feb 2024 18:03:51 +0100 Subject: [PATCH 01/11] bug fixes in triples --- pyscal_rdf/graph.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index c359783..0700764 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -291,7 +291,7 @@ def add_chemical_composition(self): element = URIRef(element_indetifiers[e]) self.add((chemical_species, CMSO.hasElement, element)) self.add((element, RDF.type, CMSO.Element)) - self.add((element, CMSO.hasSymbol, Literal(e, datatype=XSD.string))) + self.add((element, CMSO.hasChemicalSymbol, Literal(e, datatype=XSD.string))) self.add((element, CMSO.hasElementRatio, Literal(r, datatype=XSD.float))) def add_simulation_cell(self): @@ -404,10 +404,12 @@ def add_space_group(self): Returns ------- """ - self.add((self.crystal_structure, CMSO.hasSpaceGroupSymbol, + space_group = URIRef(f'{self._name}_SpaceGroup') + self.add((self.crystal_structure, CMSO.hasSpaceGroup, space_group)) + self.add((space_group, CMSO.hasSpaceGroupSymbol, Literal(self.system.schema.material.crystal_structure.spacegroup_symbol(), datatype=XSD.string))) - self.add((self.crystal_structure, CMSO.hasSpaceGroupNumber, + self.add((space_group, CMSO.hasSpaceGroupNumber, Literal(self.system.schema.material.crystal_structure.spacegroup_number(), datatype=XSD.integer))) From d7839abbb2666d3a43898b60437e67e9257264a0 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Tue, 13 Feb 2024 11:24:42 +0100 Subject: [PATCH 02/11] patch symbol and add new ontology --- notebooks/create_onto.ipynb | 2 +- notebooks/wrap_creation.ipynb | 2 +- pyscal_rdf/data/cmso.owl | 2 +- pyscal_rdf/network/network.py | 1 + pyscal_rdf/network/parser.py | 8 ++++++++ 5 files changed, 12 insertions(+), 3 deletions(-) diff --git a/notebooks/create_onto.ipynb b/notebooks/create_onto.ipynb index dfab300..103ef3b 100644 --- a/notebooks/create_onto.ipynb +++ b/notebooks/create_onto.ipynb @@ -112,7 +112,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/notebooks/wrap_creation.ipynb b/notebooks/wrap_creation.ipynb index 921bca2..baed7e5 100644 --- a/notebooks/wrap_creation.ipynb +++ b/notebooks/wrap_creation.ipynb @@ -125,7 +125,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/pyscal_rdf/data/cmso.owl b/pyscal_rdf/data/cmso.owl index 3c471ae..3a39723 100644 --- a/pyscal_rdf/data/cmso.owl +++ b/pyscal_rdf/data/cmso.owl @@ -781,7 +781,7 @@ Amorphous Material Amorphous Solid Non-crystalline Solid - An amorphous material or solid is a material which has no defined long-range periodicity. + An amorphous material or solid is a material which has no defined long-range order. diff --git a/pyscal_rdf/network/network.py b/pyscal_rdf/network/network.py index 3c3ddcd..4da4e33 100644 --- a/pyscal_rdf/network/network.py +++ b/pyscal_rdf/network/network.py @@ -235,6 +235,7 @@ def create_query(self, source, destinations, condition=None, enforce_types=True) #now add corresponding triples for destination in destination_names: for triple in all_triplets[destination]: + print(triple) query.append(" ?%s %s ?%s ."%(self.strip_name(triple[0]), triple[1], self.strip_name(triple[2]))) diff --git a/pyscal_rdf/network/parser.py b/pyscal_rdf/network/parser.py index e292c52..3a067f7 100644 --- a/pyscal_rdf/network/parser.py +++ b/pyscal_rdf/network/parser.py @@ -98,6 +98,14 @@ def _parse_data_property(self): rn = [r.__name__ for r in rn[0].Classes if r is not None] except: rn = [r.__name__ for r in rn if r is not None] + + #PATCH for symbol; could be removed in later versions + print(iri, type(rn)) + if len(rn) == 0: + if os.path.basename(iri) == "hasSymbol": + rn = ['str'] + + print(iri, rn) term = OntoTerm(iri, delimiter=self.delimiter) term.domain = dm term.range = rn From f45f5abe562c42b51bd187ec75ec23fb0aaaa674 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Tue, 13 Feb 2024 11:47:25 +0100 Subject: [PATCH 03/11] fix range in cmso.owl --- pyscal_rdf/data/cmso.owl | 1 + pyscal_rdf/network/parser.py | 22 +++++++++++++++++----- pyscal_rdf/network/term.py | 1 + 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/pyscal_rdf/data/cmso.owl b/pyscal_rdf/data/cmso.owl index 3a39723..ce6941d 100644 --- a/pyscal_rdf/data/cmso.owl +++ b/pyscal_rdf/data/cmso.owl @@ -723,6 +723,7 @@ + has symbol A data property linking an entity with its symbol. diff --git a/pyscal_rdf/network/parser.py b/pyscal_rdf/network/parser.py index 3a067f7..3143d84 100644 --- a/pyscal_rdf/network/parser.py +++ b/pyscal_rdf/network/parser.py @@ -99,13 +99,21 @@ def _parse_data_property(self): except: rn = [r.__name__ for r in rn if r is not None] + subprops = self.tree.search(subproperty_of=getattr(self.tree, c.name)) + print('------------------') + print(f'Subprops of {iri}') + for subprop in subprops: + if subprop.iri != iri: + print(subprop.iri) + print('------------------') + #PATCH for symbol; could be removed in later versions - print(iri, type(rn)) - if len(rn) == 0: - if os.path.basename(iri) == "hasSymbol": - rn = ['str'] + #print(iri, type(rn)) + #if len(rn) == 0: + # if os.path.basename(iri) == "hasSymbol": + # rn = ['str'] - print(iri, rn) + #print(iri, rn) term = OntoTerm(iri, delimiter=self.delimiter) term.domain = dm term.range = rn @@ -115,6 +123,10 @@ def _parse_data_property(self): for d in dm: if d!='07:owl#Thing': self.attributes['class'][d].is_range_of.append(term.name) + + + #subproperties should be treated the same + def _parse_object_property(self): for c in self.tree.object_properties(): diff --git a/pyscal_rdf/network/term.py b/pyscal_rdf/network/term.py index 06df913..8186658 100644 --- a/pyscal_rdf/network/term.py +++ b/pyscal_rdf/network/term.py @@ -70,6 +70,7 @@ def __init__(self, uri, #identifier self.node_id = node_id self.subclasses = [] + self.subproperties = [] self.delimiter = delimiter self.is_domain_of = [] self.is_range_of = [] From 7ddd3a1d23024f917d514434402c542296665ee0 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Tue, 13 Feb 2024 12:33:06 +0100 Subject: [PATCH 04/11] add a general patch method --- pyscal_rdf/data/cmso.owl | 1 - pyscal_rdf/network/parser.py | 32 +++++++++++++++++--------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/pyscal_rdf/data/cmso.owl b/pyscal_rdf/data/cmso.owl index ce6941d..76291e2 100644 --- a/pyscal_rdf/data/cmso.owl +++ b/pyscal_rdf/data/cmso.owl @@ -712,7 +712,6 @@ - has space group symbol A data property linking a crystal structure with its space group symbol. diff --git a/pyscal_rdf/network/parser.py b/pyscal_rdf/network/parser.py index 3143d84..457be70 100644 --- a/pyscal_rdf/network/parser.py +++ b/pyscal_rdf/network/parser.py @@ -1,4 +1,6 @@ from pyscal_rdf.network.term import OntoTerm, strip_name +from pyscal_rdf.network.patch import patch_terms + from owlready2 import get_ontology import owlready2 @@ -99,21 +101,21 @@ def _parse_data_property(self): except: rn = [r.__name__ for r in rn if r is not None] - subprops = self.tree.search(subproperty_of=getattr(self.tree, c.name)) - print('------------------') - print(f'Subprops of {iri}') - for subprop in subprops: - if subprop.iri != iri: - print(subprop.iri) - print('------------------') - - #PATCH for symbol; could be removed in later versions - #print(iri, type(rn)) - #if len(rn) == 0: - # if os.path.basename(iri) == "hasSymbol": - # rn = ['str'] - - #print(iri, rn) + + #Subproperties + #Commented out for now + #subprops = self.tree.search(subproperty_of=getattr(self.tree, c.name)) + #for subprop in subprops: + # if subprop.iri != iri: + # #print(subprop.iri) + # pass + + #PATCH + #Here: we patch specific items specifically for pyscal rdf + rn = patch_terms(iri, rn) + + print(iri, rn) + print(iri, dm) term = OntoTerm(iri, delimiter=self.delimiter) term.domain = dm term.range = rn From b459a185230125202fdc6339d702ba16df137c5f Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Tue, 13 Feb 2024 12:36:26 +0100 Subject: [PATCH 05/11] add patch file --- pyscal_rdf/network/patch.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 pyscal_rdf/network/patch.py diff --git a/pyscal_rdf/network/patch.py b/pyscal_rdf/network/patch.py new file mode 100644 index 0000000..92ac541 --- /dev/null +++ b/pyscal_rdf/network/patch.py @@ -0,0 +1,28 @@ +""" +These are patches specifically designed for pyscal-rdf. + +These may or may not be implemented in the ontology. As it is implemented; it can be removed +from the patches +""" + +import os + +def patch_terms(iri, rn): + """ + Remove functions as patching is done + """ + #Term: hasSymbol + #Ontology: CMSO + #Reason: Range is not specified in the owl file. + #This prevents owlready2 from reading in this property correctly. + if iri == 'http://purls.helmholtz-metadaten.de/cmso/hasSymbol': + rn = ['str'] + #Term: hasValue + #Ontology: CMSO + #Reason: Range is Literal(); however here we use this for number values, hence we can fix this. + #See fn: `add_calculated_property` + elif iri == 'http://purls.helmholtz-metadaten.de/cmso/hasValue': + rn = ['float'] + + return rn + From 40dba36e145cde89d23875b577a7235a870fc678 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Tue, 13 Feb 2024 12:51:51 +0100 Subject: [PATCH 06/11] turn off printing in parser --- pyscal_rdf/network/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyscal_rdf/network/parser.py b/pyscal_rdf/network/parser.py index 457be70..1f162ef 100644 --- a/pyscal_rdf/network/parser.py +++ b/pyscal_rdf/network/parser.py @@ -114,8 +114,8 @@ def _parse_data_property(self): #Here: we patch specific items specifically for pyscal rdf rn = patch_terms(iri, rn) - print(iri, rn) - print(iri, dm) + #print(iri, rn) + #print(iri, dm) term = OntoTerm(iri, delimiter=self.delimiter) term.domain = dm term.range = rn From f8c4a7d4a5534a545d100b82724de6eaf992d966 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Tue, 13 Feb 2024 12:55:29 +0100 Subject: [PATCH 07/11] add shortcut for onto terms --- pyscal_rdf/graph.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 0700764..93fd5c0 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -115,6 +115,7 @@ def __init__(self, graph_file=None, if ontology is None: ontology = read_ontology() self.ontology = ontology + self.terms = self.ontology.terms self._atom_ids = None self.store = store From f5b7381b1182a787b0b9b805616895031a19db01 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Tue, 13 Feb 2024 12:59:45 +0100 Subject: [PATCH 08/11] change enforce type to have flexible behaviour --- pyscal_rdf/graph.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/pyscal_rdf/graph.py b/pyscal_rdf/graph.py index 93fd5c0..57b4d35 100644 --- a/pyscal_rdf/graph.py +++ b/pyscal_rdf/graph.py @@ -901,13 +901,27 @@ def query(self, inquery): raise ValueError("SPARQL query returned None") def auto_query(self, source, destination, - condition=None, return_query=False, enforce_types=True): - query = self.ontology.create_query(source, destination, - condition=condition, enforce_types=enforce_types) - if return_query: - return query - return self.query(query) - + condition=None, + return_query=False, + enforce_types=None): + + if enforce_types is None: + for val in [True, False]: + query = self.ontology.create_query(source, destination, + condition=condition, enforce_types=val) + if return_query: + return query + res = self.query(query) + if len(res) != 0: + return res + else: + query = self.ontology.create_query(source, destination, + condition=condition, enforce_types=val) + if return_query: + return query + res = self.query(query) + + return res ################################# # Methods to interact with sample From 2eadf1521b705c37735b4c3f21c5ab781b250286 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Tue, 13 Feb 2024 13:08:55 +0100 Subject: [PATCH 09/11] add bridge for symbol --- pyscal_rdf/network/ontology.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyscal_rdf/network/ontology.py b/pyscal_rdf/network/ontology.py index 650e2bf..0831001 100644 --- a/pyscal_rdf/network/ontology.py +++ b/pyscal_rdf/network/ontology.py @@ -32,7 +32,12 @@ def read_ontology(): combo.add_term('http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'object_property', delimiter='#', namespace='rdf') #add paths + + #General fixes combo.add_path(('cmso:CrystalStructure', 'cmso:hasAltName', 'string')) + combo.add_path(('cmso:Element', 'cmso:hasSymbol', 'string')) + + #interontology paths combo.add_path(('cmso:Material', 'cmso:hasDefect', 'pldo:PlanarDefect')) combo.add_path(('cmso:Material', 'cmso:hasDefect', 'podo:Vacancy')) combo.add_path(('cmso:SimulationCell', 'podo:hasVacancyConcentration', 'float')) From 95bb24f7a59246edd378598a32051610273751e7 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Tue, 13 Feb 2024 13:15:53 +0100 Subject: [PATCH 10/11] fix patch --- pyscal_rdf/network/ontology.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyscal_rdf/network/ontology.py b/pyscal_rdf/network/ontology.py index 0831001..c813095 100644 --- a/pyscal_rdf/network/ontology.py +++ b/pyscal_rdf/network/ontology.py @@ -35,7 +35,7 @@ def read_ontology(): #General fixes combo.add_path(('cmso:CrystalStructure', 'cmso:hasAltName', 'string')) - combo.add_path(('cmso:Element', 'cmso:hasSymbol', 'string')) + combo.add_path(('cmso:ChemicalElement', 'cmso:hasSymbol', 'string')) #interontology paths combo.add_path(('cmso:Material', 'cmso:hasDefect', 'pldo:PlanarDefect')) From bc93c3471d4a87c9e6c12264e575976ec2887c31 Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Tue, 13 Feb 2024 13:16:07 +0100 Subject: [PATCH 11/11] =?UTF-8?q?Bump=20version:=200.1.5=20=E2=86=92=200.1?= =?UTF-8?q?.6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 4982131..91f332e 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.1.5 +current_version = 0.1.6 commit = True tag = False diff --git a/setup.py b/setup.py index 92d1421..127689a 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='pyscal_rdf', - version='0.1.5', + version='0.1.6', author='Abril Azocar Guzman, Sarath Menon', author_email='sarath.menon@pyscal.org', description='Ontology based structural manipulation and quering',