Skip to content

Commit

Permalink
Merge pull request #8 from odissei-data/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
FjodorvRijsselberg authored Apr 5, 2023
2 parents da61482 + 4860fba commit 2463663
Show file tree
Hide file tree
Showing 11 changed files with 71 additions and 68 deletions.
3 changes: 0 additions & 3 deletions .env

This file was deleted.

1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ celerybeat.pid
*.sage.py

# Environments
.env
.venv
env/
venv/
Expand Down
3 changes: 1 addition & 2 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,4 @@ services:
- "./${APPLICATION_DIR}:/root/${APPLICATION_DIR}"
ports:
- "${PORT}:${PORT}"
environment:
PYTHONPATH: "/root/src/"
env_file: .env
14 changes: 14 additions & 0 deletions dot_env_example
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Container setup
CONTAINER_NAME=metadata-enhancer
PORT=7070
APPLICATION_DIR=src
PYTHONPATH: "/root/src/"

# URL'S
KEYWORD_VOCABULARY_URL='https://fuseki.odissei.nl/skosmos/sparql'
VARIABLE_VOCABULARY_URL='https://fuseki.odissei.nl/skosmos/sparql'
GRLC_API_URL='https://grlc.odissei.nl/api-git/odissei-data/grlc/'

# Endpoints
KEYWORD_ENDPOINT='matchElsstTermForKeyword'
VARIABLE_ENDPOINT='getCbsVarUri'
14 changes: 13 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metadata-enhancer"
version = "0.1.1"
version = "1.0.0"
description = "A project allowing one to set up a FastAPI based Microservice, using cookiecutter"
authors = ["Thomas van Erven"]
include = ["src"]
Expand All @@ -13,6 +13,7 @@ fastapi = "^0.85.0"
pytest = "^7.1.3"
requests = "^2.28.1"
dynaconf = "^3.1.11"
jmespath = "^1.0.1"

[tool.poetry.dev-dependencies]
ipython = "^8.5.0"
Expand Down
33 changes: 11 additions & 22 deletions src/enhancers/KeywordEnhancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@


class KeywordEnhancer(MetadataEnhancer):
""" This class can be used to enhance the keywords in DV metadata.
"""
""" This class can be used to enhance the keywords in DV metadata. """
def __init__(self, metadata: dict, endpoint: str, sparql_endpoint: str):
"""
The ELSST Topics metadata block is created to add the matched terms to.
Expand All @@ -23,52 +21,43 @@ def enhance_metadata(self):
keywords = self.get_value_from_metadata('keyword', 'citation')

for keyword_dict in keywords:
keyword = _try_for_key(keyword_dict, ['keywordValue', 'value'],
'keywordValue field value not found in'
' keyword metadata block')
keyword = _try_for_key(keyword_dict, 'keywordValue.value')

terms_dict = self.query_matched_terms(
keyword,
)

terms = _try_for_key(
terms_dict,
['results', 'bindings'],
'grlc endpoint returned badly formatted JSON.'
)
terms = _try_for_key(terms_dict, 'results.bindings')
topic = self.create_elsst_topic_keyword(keyword)
self.add_terms_to_metadata(terms, topic)

def add_terms_to_metadata(self, terms: list, topic: dict):
""" Goes through all the retrieved terms and adds them to the metadata.
For every term we add a URI and a label to the matched keyword in
the ELSST Topics block.
There is a limit of 3 terms that can be added for a single keyword.
The metadata block contains fields for elsstVarUri1, elsstVarUri2,
and elsstVarUri3. The same goes for the labels.
For every term we add a URI and a label to the matched keyword in
the ELSST Topics block.
There is a limit of 3 terms that can be added for a single keyword.
The metadata block contains fields for elsstVarUri1, elsstVarUri2,
and elsstVarUri3. The same goes for the labels.
:param terms: The terms matched to a specific keyword.
:param topic: The topic field that keyword is in.
"""

max_terms = min(len(terms), 3)
for i in range(max_terms):
counter = i+1
counter = i + 1
self.add_term_uri(terms[i], counter, topic)
self.add_term_label(terms[i], counter, topic)

def add_term_uri(self, term: dict, counter: int, topic: dict):
uri = _try_for_key(term, ['iri', 'value'],
'No uri found for ELSST term')
uri = _try_for_key(term, 'iri.value')
uri_type_name = f'elsstVarUri{counter}'
self.add_term_to_metadata_field(topic, uri_type_name, uri)

def add_term_label(self, term: dict, counter: int, topic: dict):
label = _try_for_key(term, ['lbl', 'value'],
'No label found for ELSST term')
label = _try_for_key(term, 'lbl.value')
label_type_name = f'elsstVarLabel{counter}'
self.add_term_to_metadata_field(topic, label_type_name, label)

Expand Down
16 changes: 8 additions & 8 deletions src/enhancers/MetadataEnhancer.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import os

import requests
from fastapi import HTTPException

from .utils import _try_for_key

GRLC_API_URL = os.environ['GRLC_API_URL']


class MetadataEnhancer:
""" A super class used for enhancing Dataverse metadata
""" A super class used for enhancing Dataverse metadata.
The MetadataEnhancer's is a class that describes the steps for enhancement.
A class that implements MetadataEnhancer will need to mainly implement the
Expand All @@ -25,8 +29,7 @@ def __init__(self, metadata: dict, endpoint: str, sparql_endpoint: str):
self.sparql_endpoint = sparql_endpoint
self.metadata_blocks = _try_for_key(
metadata,
['datasetVersion', 'metadataBlocks'],
'Metadata does not contain datasetVersion or metadataBlocks key'
'datasetVersion.metadataBlocks',
)

@property
Expand All @@ -45,9 +48,7 @@ def get_value_from_metadata(self, metadata_field_name: str,
"""
fields = _try_for_key(
self.metadata_blocks,
[metadata_block, 'fields'],
'variableInformation metadata block not found. '
'JSON might be formatted incorrectly.'
f'{metadata_block}.fields'
)

metadata_field = next((field for field in fields if
Expand Down Expand Up @@ -78,8 +79,7 @@ def query_matched_terms(self, value_to_match: str) -> dict:
'endpoint': self.sparql_endpoint,
}

url = 'https://grlc.odissei.nl/api-git/odissei-data/grlc/' \
+ self.endpoint
url = GRLC_API_URL + self.endpoint
response = requests.get(
url,
params=params,
Expand Down
12 changes: 3 additions & 9 deletions src/enhancers/VariableEnhancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,13 @@ def enhance_metadata(self):
'variableInformation')

for variable_dict in variables:
variable = _try_for_key(variable_dict, ['variableName', 'value'],
'variableName field value not found')
variable = _try_for_key(variable_dict, 'variableName.value')

terms_dict = self.query_matched_terms(
variable
)

terms = _try_for_key(
terms_dict,
['results', 'bindings'],
'grlc endpoint returned badly formatted JSON.'
)
terms = _try_for_key(terms_dict, 'results.bindings')
self.add_terms_to_metadata(terms, variable_dict)

def add_terms_to_metadata(self, terms: list, variable_dict: dict):
Expand All @@ -46,7 +41,6 @@ def add_terms_to_metadata(self, terms: list, variable_dict: dict):
if not terms:
return
term = terms[0]
uri = _try_for_key(term, ['iri', 'value'],
'No uri found for ELSST term')
uri = _try_for_key(term, 'iri.value')
variable_type_name = 'variableVocabularyURI'
self.add_term_to_metadata_field(variable_dict, variable_type_name, uri)
25 changes: 7 additions & 18 deletions src/enhancers/utils.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,13 @@
import jmespath
from fastapi import HTTPException
from jmespath.exceptions import JMESPathError


def _try_for_key(dictionary: dict, key_path: list, exception_detail: str):
def _try_for_key(dictionary: dict, key_path: str):
""" A function to retrieve a value from a nested dictionary.
The function first sets value equal to dictionary and then loops through
the key_path list, attempting to retrieve the value at each key.
If the value cannot be retrieved due to a KeyError or TypeError,
a HTTPException with a status code of 400 and the provided error message
will be raised.
:param dictionary: A dictionary object to traverse to retrieve a value.
:param key_path: The keys to be traversed to retrieve a value.
:param exception_detail: A string containing a custom error message.
:return: The value at the end of the key path in the dictionary.
"""
value = dictionary
for key in key_path:
try:
value = value[key]
except (KeyError, TypeError):
raise HTTPException(status_code=422, detail=exception_detail)
return value
try:
return jmespath.search(key_path, dictionary)
except JMESPathError as error:
raise HTTPException(status_code=422, detail=str(error))
15 changes: 11 additions & 4 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

from fastapi import FastAPI

from enhancers.KeywordEnhancer import KeywordEnhancer
Expand All @@ -7,6 +9,11 @@

app = FastAPI()

KEYWORD_ENDPOINT = os.environ['KEYWORD_ENDPOINT']
VARIABLE_ENDPOINT = os.environ['VARIABLE_ENDPOINT']
KEYWORD_VOCABULARY_URL = os.environ['KEYWORD_VOCABULARY_URL']
VARIABLE_VOCABULARY_URL = os.environ['VARIABLE_VOCABULARY_URL']


@app.get("/version", tags=["Version"])
async def info():
Expand All @@ -19,8 +26,8 @@ async def dataverse_keyword_enhancer(
enhancer_input: EnhancerInput) -> dict:
keyword_enhancer = KeywordEnhancer(
enhancer_input.metadata,
'matchElsstTermForKeyword',
'https://fuseki.odissei.nl/skosmos/sparql'
KEYWORD_ENDPOINT,
KEYWORD_VOCABULARY_URL
)
keyword_enhancer.enhance_metadata()
return keyword_enhancer.metadata
Expand All @@ -30,8 +37,8 @@ async def dataverse_keyword_enhancer(
async def dataverse_metadata_enhancer(enhancer_input: EnhancerInput) -> dict:
variable_enhancer = VariableEnhancer(
enhancer_input.metadata,
'getCbsVarUri',
'https://fuseki.odissei.nl/skosmos/sparql'
VARIABLE_ENDPOINT,
VARIABLE_VOCABULARY_URL
)
variable_enhancer.enhance_metadata()
return variable_enhancer.metadata

0 comments on commit 2463663

Please sign in to comment.