Skip to content

Commit

Permalink
Merge branch 'main' into ci
Browse files Browse the repository at this point in the history
  • Loading branch information
tngTUDOR committed Oct 10, 2024
2 parents 330fbd5 + 29b468a commit f67eae9
Show file tree
Hide file tree
Showing 10 changed files with 36,660 additions and 63 deletions.
54 changes: 29 additions & 25 deletions sentier_vocab/add_terms.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from .ordered_serialization import OrderedTurtleSerializer
from pathlib import Path

import skosify
from rdflib import Graph, Literal, Namespace, URIRef
from rdflib.namespace import DCTERMS, RDF, RDFS, SKOS
import skosify

from .ordered_serialization import OrderedTurtleSerializer

VAEM = Namespace("http://www.linkedmodel.org/schema/vaem")
QUDTS = Namespace("http://qudt.org/schema/qudt/")
Expand All @@ -11,25 +13,25 @@


COMMON_PREDICATES = {
'broader': SKOS.broader,
'narrower': SKOS.narrower,
'prefLabel': SKOS.prefLabel,
'altLabel': SKOS.altLabel,
'hiddenLabel': SKOS.hiddenLabel,
'notation': SKOS.notation,
'definition': SKOS.definition,
'related': SKOS.related,
'exactMatch': SKOS.exactMatch,
'closeMatch': SKOS.closeMatch,
'inScheme': SKOS.inScheme,
'isDefinedBy': RDFS.isDefinedBy,
'isReplacedBy': DCTERMS.isReplacedBy,
'type': RDF.type,
'hasQuantityKind': QUDTS.hasQuantityKind,
'hasDimensionVector': QUDTS.hasDimensionVector,
'conversionMultiplier': QUDTS.conversionMultiplier,
'conversionMultiplier': QUDTS.conversionMultiplier,
'conversionMultiplierSN': QUDTS.conversionMultiplierSN,
"broader": SKOS.broader,
"narrower": SKOS.narrower,
"prefLabel": SKOS.prefLabel,
"altLabel": SKOS.altLabel,
"hiddenLabel": SKOS.hiddenLabel,
"notation": SKOS.notation,
"definition": SKOS.definition,
"related": SKOS.related,
"exactMatch": SKOS.exactMatch,
"closeMatch": SKOS.closeMatch,
"inScheme": SKOS.inScheme,
"isDefinedBy": RDFS.isDefinedBy,
"isReplacedBy": DCTERMS.isReplacedBy,
"type": RDF.type,
"hasQuantityKind": QUDTS.hasQuantityKind,
"hasDimensionVector": QUDTS.hasDimensionVector,
"conversionMultiplier": QUDTS.conversionMultiplier,
"conversionMultiplier": QUDTS.conversionMultiplier,
"conversionMultiplierSN": QUDTS.conversionMultiplierSN,
}
OBJECT_TYPES_FOR_PREDICATES = {
SKOS.broader: Literal,
Expand All @@ -53,8 +55,8 @@
QUDTS.conversionMultiplierSN: URIRef,
}
COMMON_OBJECTS = {
'Concept': SKOS.Concept,
'ConceptScheme': SKOS.ConceptScheme,
"Concept": SKOS.Concept,
"ConceptScheme": SKOS.ConceptScheme,
}


Expand Down Expand Up @@ -124,7 +126,9 @@ def add_custom_terms(data: list[dict], namespace: str, filename: str) -> Path:
raise ValueError(f"Object {o} can be translated into correct form")

if object_type is not None and not isinstance(object_, object_type):
raise ValueError(f"Object {object_} has incorrect type for this function; should be {type(object_type)} but got {type(object_)}")
raise ValueError(
f"Object {object_} has incorrect type for this function; should be {type(object_type)} but got {type(object_)}"
)

graph.add((subject, predicate, object_))

Expand All @@ -134,7 +138,7 @@ def add_custom_terms(data: list[dict], namespace: str, filename: str) -> Path:

output_path = (Path(__file__).parent / "output" / filename).with_suffix(".ttl")
serializer = OrderedTurtleSerializer(graph)
with open(output_path, 'wb') as fp:
with open(output_path, "wb") as fp:
serializer.serialize(fp)

return output_path
2 changes: 1 addition & 1 deletion sentier_vocab/envo.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,4 @@ def write_graph(


if __name__ == "__main__":
ENVO().write_graph(dirpath = Path(__file__).parent / "output")
ENVO().write_graph(dirpath=Path(__file__).parent / "output")
110 changes: 110 additions & 0 deletions sentier_vocab/geonames_iri_terms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import os
import zipfile
from urllib.request import urlretrieve

import polars as pl
import sentier_data_tools as sdt
from rdflib import Graph, Literal, Namespace, URIRef
from rdflib.namespace import RDF, SKOS, XSD
from skosify import infer

"""
The data for this was found at geonames' site, but it's much too large to put onto git.
For the data used to generate the dataframe for the entire world, look here:
384MB, unpacks to 1.6 GB https://download.geonames.org/export/dump/allCountries.zip
For the hierarchy dataframe, look here:
2MB, unpacks to 9MB https://download.geonames.org/export/dump/hierarchy.zip
set the world_path to where you stored allCountries.txt, and hierarchy_path to wherever hierarchy.txt is.
"""


def generateGeonameVocabulary(world_path: str, hierarchy_path: str):

# ## THIS PART FETCHES AND EXTRACTS.
# temp_dir = os.path.join(os.curdir,"temp")
# if not os.path.exists(temp_dir):
# os.mkdir(temp_dir)

# hier_zip = os.path.realpath(os.path.join(temp_dir,"hierarchy.zip"))
# hierarchy_path = os.path.realpath(os.path.join(temp_dir,"hierarchy.txt"))
# hierarchy_url = "https://download.geonames.org/export/dump/hierarchy.zip"

# world_zip = os.path.realpath(os.path.join(temp_dir,"allCountries.zip"))
# world_path = os.path.realpath(os.path.join(temp_dir,"allCountries.txt"))
# world_url = "https://download.geonames.org/export/dump/allCountries.zip"

# urlretrieve(hierarchy_url,hier_zip)

# urlretrieve(world_url,world_zip)

# with zipfile.ZipFile(hier_zip, 'r') as zip_ref:
# zip_ref.extractall(temp_dir)

# with zipfile.ZipFile(world_zip, 'r') as zip_reff:
# zip_reff.extractall(temp_dir)

# ##FETCHING AND EXTRACTING COMPLETED

GEOSPACES = "https://sws.geonames.org/"
GN = Namespace("http://www.geonames.org/ontology#")

all_schema = pl.Schema(
{
"geonameid": pl.Int64,
"name": pl.String,
"asciiname": pl.String,
"alternatenames": pl.String,
"latitude": pl.Float32,
"longitude": pl.Float32,
"feature_class": pl.String,
"feature_code": pl.String,
"country_code": pl.String,
"cc2": pl.String,
"admin1_code": pl.String,
"admin2_code": pl.String,
"admin3_code": pl.String,
"admin4_code": pl.String,
"population": pl.Int64,
"elevation": pl.Int16,
"dem": pl.Int64,
"timezone": pl.String,
"modification_date": pl.Date,
}
)

world_frame = pl.scan_csv(
source=world_path, has_header=False, separator="\t", schema=all_schema
)

##In the SQL here you can actually expand or narrow what you're going to model.
##See more at https://download.geonames.org/export/dump/readme.txt, scroll down to "feature classes"
##to isolate only countries, use "where feature_code = 'PCLI'"

hierarchy_schema = pl.Schema({"parent": pl.Int64, "child": pl.Int64, "admin1_code": pl.String})

hierarchy = pl.scan_csv(hierarchy_path, schema=hierarchy_schema, separator="\t")

filtered_world = world_frame.sql(
"select * from self where feature_code in ('PCLI', 'ADM1', 'RGN')"
).collect()

world = Graph()

for item in filtered_world.iter_rows():
uri = URIRef(GEOSPACES + str(item[0]))
pref_name = Literal(item[1])
alt_names = []
# if item[3]:
# alt_names = item[3].split(",")
world.add((uri, RDF.type, SKOS.Concept))
world.add((uri, SKOS.prefLabel, pref_name))
world.add((uri, GN.countryCode, Literal(item[8])))
children = hierarchy.sql(f"select * from self where parent = {item[0]}").collect()
if len(children) > 0:
for child in children.iter_rows():
if not filtered_world.filter(pl.col("geonameid") == child[1]).is_empty():
world.add((uri, SKOS.narrower, URIRef(GEOSPACES + str(child[1]))))

infer.skos_hierarchical(world)
world.serialize(destination="output/geonames-iri.ttl")
4 changes: 1 addition & 3 deletions sentier_vocab/graph_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ def skosify_checks(self):
def get_identifier(self, uri: URIRef) -> str:
return uri.split("/")[-1]

def write_graph(
self, filename: str, dirpath: Path | None = None
) -> Path:
def write_graph(self, filename: str, dirpath: Path | None = None) -> Path:
if not filename.endswith(".ttl"):
filename += ".ttl"
if not dirpath:
Expand Down
52 changes: 43 additions & 9 deletions sentier_vocab/input/custom_products.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from rdflib.namespace import RDFS, SKOS, RDF
from rdflib import URIRef, Namespace, Literal
from rdflib import Literal, Namespace, URIRef
from rdflib.namespace import RDF, RDFS, SKOS

PRODUCTS = Namespace("https://vocab.sentier.dev/products/")

Expand All @@ -8,35 +8,69 @@
("electrolyzer", "broader", "http://data.europa.eu/xsp/cn2024/854330700080"),
("electrolyzer", "prefLabel", "Electrolyzer", "en-US"),
("electrolyzer", "prefLabel", "Electrolyzer", "en-GB"),
("electrolyzer", "definition", "An electrolyzer is a machine that uses electricity to drive a chemical reaction.", "en"),
(
"electrolyzer",
"definition",
"An electrolyzer is a machine that uses electricity to drive a chemical reaction.",
"en",
),
("electrolyzer", "related", "https://en.wikipedia.org/wiki/Electrolysis"),
("aec-electrolyzer", "type", "Concept"),
("aec-electrolyzer", "broader", PRODUCTS + "electrolyzer"),
("aec-electrolyzer", "prefLabel", "Alkaline Electrolysis Cell Electrolyzer", "en"),
("aec-electrolyzer", "definition", "An electrolyser with two electrodes operating in a liquid alkaline electrolyte.", "en"),
(
"aec-electrolyzer",
"definition",
"An electrolyser with two electrodes operating in a liquid alkaline electrolyte.",
"en",
),
("aec-electrolyzer", "related", "https://en.wikipedia.org/wiki/Alkaline_water_electrolysis"),
("pem-electrolyzer", "type", "Concept"),
("pem-electrolyzer", "broader", PRODUCTS + "electrolyzer"),
("pem-electrolyzer", "prefLabel", "Proton Exchange Membrane Electrolyser", "en-GB"),
("pem-electrolyzer", "prefLabel", "Proton Exchange Membrane Electrolyzer", "en-US"),
("pem-electrolyzer", "definition", "An electrolyser with a solid polymer electrolyte and a proton exchange membrane.", "en"),
("pem-electrolyzer", "related", "https://en.wikipedia.org/wiki/Proton_exchange_membrane_electrolysis"),
(
"pem-electrolyzer",
"definition",
"An electrolyser with a solid polymer electrolyte and a proton exchange membrane.",
"en",
),
(
"pem-electrolyzer",
"related",
"https://en.wikipedia.org/wiki/Proton_exchange_membrane_electrolysis",
),
("soel-electrolyzer", "type", "Concept"),
("soel-electrolyzer", "broader", PRODUCTS + "electrolyzer"),
("soel-electrolyzer", "prefLabel", "Solid Oxide Electrolyzer", "en"),
("soel-electrolyzer", "definition", "A solid oxide fuel cell that runs in regenerative mode to achieve the electrolysis of water.", "en"),
(
"soel-electrolyzer",
"definition",
"A solid oxide fuel cell that runs in regenerative mode to achieve the electrolysis of water.",
"en",
),
("soel-electrolyzer", "related", "https://en.wikipedia.org/wiki/Solid_oxide_electrolyzer_cell"),
# Missing from Combined Nomenclature
# tetraflouroethylene, not poly-
("tetrafluoroethylene", "type", "Concept"),
("tetrafluoroethylene", "broader", "http://data.europa.eu/xsp/cn2024/290349000080"),
("tetrafluoroethylene", "prefLabel", "Tetrafluoroethylene", "en"),
("tetrafluoroethylene", "related", "https://en.wikipedia.org/wiki/Tetrafluoroethylene"),
("tetrafluoroethylene", "definition", "Tetrafluoroethylene (TFE) is a fluorocarbon with the chemical formula C2F4. It is the simplest perfluorinated alkene. This gaseous species is used primarily in the industrial preparation of fluoropolymers (from Wikipedia)", "en"),
(
"tetrafluoroethylene",
"definition",
"Tetrafluoroethylene (TFE) is a fluorocarbon with the chemical formula C2F4. It is the simplest perfluorinated alkene. This gaseous species is used primarily in the industrial preparation of fluoropolymers (from Wikipedia)",
"en",
),
# Zeolite
("zeolite", "type", "Concept"),
("zeolite", "broader", "http://data.europa.eu/xsp/cn2024/382400000080"),
("zeolite", "prefLabel", "Zeolite", "en"),
("zeolite", "related", "https://en.wikipedia.org/wiki/Zeolite"),
("zeolite", "definition", "Zeolite is a family of several microporous, crystalline aluminosilicate materials commonly used as commercial adsorbents and catalysts (from Wikipedia)", "en"),
(
"zeolite",
"definition",
"Zeolite is a family of several microporous, crystalline aluminosilicate materials commonly used as commercial adsorbents and catalysts (from Wikipedia)",
"en",
),
]
15 changes: 7 additions & 8 deletions sentier_vocab/open_energy_ontology.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from rdflib import Graph, Namespace, URIRef
from sentier_vocab.utils import get_file_in_downloadable_zip_archive
from sentier_vocab.graph_base import GraphBase
from rdflib.namespace import RDFS, SKOS, RDF
import skosify
from loguru import logger
from rdflib import Graph, Namespace, URIRef
from rdflib.namespace import RDF, RDFS, SKOS

from sentier_vocab.graph_base import GraphBase
from sentier_vocab.utils import get_file_in_downloadable_zip_archive

OEO = Namespace("http://openenergy-platform.org/ontology/oeo/")
OBO = Namespace('http://purl.obolibrary.org/obo/')
OBO = Namespace("http://purl.obolibrary.org/obo/")

MATCHES = {
# Hydrogen
Expand Down Expand Up @@ -34,7 +35,6 @@
URIRef(OEO + "OEO_00010382"): URIRef(OEO + "OEO_00010381"),
# Fossil steam reforming hydrogen with CCS
URIRef(OEO + "OEO_00010383"): URIRef(OEO + "OEO_00010381"),

### Electricity
# renewable electrical energy
URIRef(OEO + "OEO_00010384"): URIRef("http://data.europa.eu/xsp/cn2024/271600000080"),
Expand All @@ -57,8 +57,7 @@ class OpenEnergyProducts(GraphBase):
def __init__(self, graph: Graph | None = None):
logger.info("Parsing and creating Open Energy Ontology elements")
data = get_file_in_downloadable_zip_archive(
"https://openenergyplatform.org/ontology/oeo/releases/latest",
"oeo-full.owl"
"https://openenergyplatform.org/ontology/oeo/releases/latest", "oeo-full.owl"
)
self.input_graph = Graph().parse(data, format="xml")
new_graph = graph is None
Expand Down
Loading

0 comments on commit f67eae9

Please sign in to comment.