From 4b40d28dedb57b4744454037cca885c93e7a8598 Mon Sep 17 00:00:00 2001 From: Tom Gebhardt Date: Thu, 9 Nov 2023 20:30:20 +0100 Subject: [PATCH] * Tasks are queued, excecuted and updated as planed * clients would have to ask for an update from time to tim until the session is finished (see "local_requests.py") --- app/celery/automated_tasks/__init__.py | 7 +- app/celery/automated_tasks/csh_fair.py | 154 ++++++++++++++++++++++ app/celery/automated_tasks/csh_helpers.py | 14 ++ app/metrics/metrics.csv | 13 +- app/models/session.py | 6 - app/routers/csh_router.py | 43 ------ tests/local_request.py | 15 ++- 7 files changed, 197 insertions(+), 55 deletions(-) create mode 100644 app/celery/automated_tasks/csh_fair.py create mode 100644 app/celery/automated_tasks/csh_helpers.py delete mode 100644 app/routers/csh_router.py diff --git a/app/celery/automated_tasks/__init__.py b/app/celery/automated_tasks/__init__.py index ac527ce..82a066c 100644 --- a/app/celery/automated_tasks/__init__.py +++ b/app/celery/automated_tasks/__init__.py @@ -1,9 +1,12 @@ from .f1_model_persistent_identifier_task import f1_model_persistent_identifier from .f4_model_metadata_harvestable_task import f4_model_metadata_harvestable -from .csh_f2_persistent_identifier_task import csh_f2_persistent_identifier +from .csh_fair import csh_f1_2_globally_unique_identifier, csh_a1_contains_access_information + + __all__ = [ f1_model_persistent_identifier, f4_model_metadata_harvestable, - csh_f2_persistent_identifier + csh_f1_2_globally_unique_identifier, + csh_a1_contains_access_information ] diff --git a/app/celery/automated_tasks/csh_fair.py b/app/celery/automated_tasks/csh_fair.py new file mode 100644 index 0000000..e769595 --- /dev/null +++ b/app/celery/automated_tasks/csh_fair.py @@ -0,0 +1,154 @@ +import re +import requests + +#from typing import Optional + +from .csh_helpers import check_route +from app.dependencies.settings import get_settings +from ... import models + +from app.celery.celery_app import app + +config = get_settings() + +def is_doi(identifier): + doi_pattern = r'^10\.\d{4,9}/[-._;()/:A-Z0-9]+$' + # Use the re.match function to check if the string matches the pattern + return bool(re.match(doi_pattern, identifier)) + + + +def incoperate_results(task_dict: dict, result: ["success","failed","warning"], test: bool): + print("incoperate results!") + session_id = task_dict["session_id"] + task_id = task_dict["id"] + + print(config.celery_key) + status = models.TaskStatusIn( + status=models.TaskStatus(result), force_update=config.celery_key + ) + + print(f"Task status computed: {result}") + # Needs to send a request for the task to be updated + if test: + print("test is true") + return models.TaskStatus(result) + else: + url = f"http://{config.backend_url}:{config.backend_port}/session/{session_id}/tasks/{task_id}" + print(f"Patching {url}") + requests.patch( + url, + json=status.dict(), + ) + + # Does not work because celery does not have access to fair_indicators + # routers.update_task(session_id, task_id, status) + + # Works, but does not trigger updating of children + # redis_app.json().set(f"session:{session_id}", f".tasks.{task_id}.status", obj=result) + + + +@app.task +def csh_f1_2_globally_unique_identifier( + task_dict: dict, data: dict, test: bool = False +): + print("f1_2_glob") + """ + Representation of celery task to evaluate an assessment. + These celery tasks should be in the format: + ``` + def assessment_task(task_dict: dict, data: dict) -> None: + session_id = task_dict["session_id"] + task_id = task_dict["id"] + + # Code to get the final TaskStatus + ... + + status = models.TaskStatusIn(status=models.TaskStatus(result), force_update=config.celery_key) + requests.patch( + f"http://localhost:8000/session/{session_id}/tasks/{task_id}, + json=status + ) + + :param task_dict: Task dict representation + :param data: (Meta)Data to evaluate + :return: None + """ + + + identifier = check_route(data, ["resource", "resource_identifier"]) + + #could also retrive "type" from data instead of using .startswith + + if(is_doi(identifier)): + result = "success" + elif(identifier.startswith("DRKS")): + result = "success" + else: + result = "failed" + + incoperate_results(task_dict, result, test) + + +# @app.task +# def csh_f1_1_persistent_identifier(task_dict: dict, data: dict, test: bool = False): + +# """ +# Task to test weather an identifier is persistent. +# Since the identifier is either unique for CSH, it is persistent +# """ + +# result = "success" + +# incoperate_results(task_dict, result, test) + +# @app.task +# def csh_f2_rich_metadata_provided(task_dict: dict, data: dict, test: bool = False): +# """ +# The nature of the CSH with all its mandatory fields implies a success +# """ + +# result = "success" + +# incoperate_results(task_dict, result, test) + +# @app.task +# def csh_f3_id_of_do_included(task_dict: dict, data: dict, test: bool = False): +# """ +# we are unsure about this indicator. At the moment we consider it as a fail +# """ + +# result = "success" + +# incoperate_results(task_dict, result, test) + + +# @app.task +# def csh_f4_metadata_indexed(task_dict: dict, data: dict, test: bool = False): +# """ +# since the data is send to out tool as a json it clearly is indexed +# """ + +# result = "success" + +# incoperate_results(task_dict, result, test) + +@app.task +def csh_a1_contains_access_information(task_dict: dict, data: dict, test: bool = False): + """ + 1. check if there is a data sharing plan (study_data_sharing_plan_generally) + 2. if yes -> evaluate ‘study_data_sharing_plan_time_frame’ and ‘study_data_sharing_plan_access_criteria’ somehow + """ + general_plan = check_route(data, ["resource","study_design","study_data_sharing_plan","study_data_sharing_plan_description"]) + print("INFO - general plan - ", general_plan) + + has_plan = general_plan == "Yes, there is a plan to make data available" + + if has_plan: + print("TODO: implent a check of the actual data sharing plan") + result = "success" + else: + result = "failed" + + incoperate_results(task_dict, result, test) \ No newline at end of file diff --git a/app/celery/automated_tasks/csh_helpers.py b/app/celery/automated_tasks/csh_helpers.py new file mode 100644 index 0000000..d6478ef --- /dev/null +++ b/app/celery/automated_tasks/csh_helpers.py @@ -0,0 +1,14 @@ +import json + +def check_route(metadata, route_keys): + current_position = json.loads(metadata) + + for key in route_keys: + if key in current_position: + + current_position = current_position[key] + else: + #if a key is missing return false + return False + #if the route exists return the value + return current_position diff --git a/app/metrics/metrics.csv b/app/metrics/metrics.csv index e9cc8bb..72c1268 100644 --- a/app/metrics/metrics.csv +++ b/app/metrics/metrics.csv @@ -83,5 +83,14 @@ "CA-RDA-R1.3-02MM","Essential","Metadata of model is expressed in compliance with a machine-understandable community standard","This indicator requires that the model metadata follows a community standard that has a machine-understandable expression","This indicator can be evaluated by verifying that the community standard used f or the metadata has a machine-understandable expression" "CA-RDA-R1.3-03MA","Important","Metadata of archive is expressed in compliance with a machine-understandable cross-community standard","This indicator requires that the archive metadata follows a cross-community standard that has a machine-understandable expression","This indicator can be evaluated by verifying that the cross-community standard used f or the metadata has a machine-understandable expression" "CA-RDA-R1.3-03MM","Important","Metadata of model is expressed in compliance with a machine-understandable cross-community standard","This indicator requires that the model metadata follows a cross-community standard that has a machine-understandable expression","This indicator can be evaluated by verifying that the cross-community standard used f or the metadata has a machine-understandable expression" -"CSH-RDA-F1-01","Important","Is a unique identifier assigned to the study","This indicator is the inital test indicator for evaluating elemts from the CSH","And some more details","??" -"CSH-RDA-F2-01","Essential","Something else","And some more details","??" \ No newline at end of file +"CSH-RDA-F1-01M","Essential","Metadata is identified by a persistent identifier","---Add a description about the indicator (probably adapt from RDA schema)---","Add an explanation on how this indicator is technically evaluated" +"CSH-RDA-F1-02M","Essential","Metadata is identified by a globally unique identifier","---Add a description about the indicator (probably adapt from RDA schema)---","Add an explanation on how this indicator is technically evaluated" +"CSH-RDA-F2-01M","Essential","Rich metadata is provided to allow discovery","---Add a description about the indicator (probably adapt from RDA schema)---","Add an explanation on how this indicator is technically evaluated" +"CSH-RDA-F3-01M","Essential","Metadata includes the identifier for the data","---Add a description about the indicator (probably adapt from RDA schema)---","Add an explanation on how this indicator is technically evaluated" +"CSH-RDA-F4-01M","Essential","Metadata is offered in such a way that it can be harvested and indexed","---Add a description about the indicator (probably adapt from RDA schema)---","Add an explanation on how this indicator is technically evaluated" +"CSH-RDA-A1-01M","Important","Metadata contains information to enable the user to get access to the data","---Add a description about the indicator (probably adapt from RDA schema)---","Add an explanation on how this indicator is technically evaluated" +"CSH-RDA-A1-02M","Essential","Metadata can be accessed manually","---Add a description about the indicator (probably adapt from RDA schema)---","Add an explanation on how this indicator is technically evaluated" +"CSH-RDA-A1-03M","Essential","Metadata identifier resolves to a metadata record","---Add a description about the indicator (probably adapt from RDA schema)---","Add an explanation on how this indicator is technically evaluated" +"CSH-RDA-A1-04M","Essential","Metadata is accessed through standardised protocol","---Add a description about the indicator (probably adapt from RDA schema)---","Add an explanation on how this indicator is technically evaluated" +"CSH-RDA-A1.1-01M","Essential","Metadata is accessible through a free access protocol","---Add a description about the indicator (probably adapt from RDA schema)---","Add an explanation on how this indicator is technically evaluated" +"CSH-RDA-A2-01M","Essential","Metadata is guaranteed to remain available after data is no longer available","---Add a description about the indicator (probably adapt from RDA schema)---","Add an explanation on how this indicator is technically evaluated" \ No newline at end of file diff --git a/app/models/session.py b/app/models/session.py index 3d080f5..04c4b91 100644 --- a/app/models/session.py +++ b/app/models/session.py @@ -96,7 +96,6 @@ class SessionSubjectIn(BaseModel): @validator("subject_type", always=True) def necessary_data_provided(cls, subject_type: str, values: dict): - print(values) if subject_type is SubjectType.manual: if ( values.get("has_archive") is None @@ -205,9 +204,6 @@ def __init__(self, session: Session) -> None: if self.user_input.subject_type in [SubjectType.file, SubjectType.url]: #url is currently not supported, thus this step wouldn't be reached for URL support self.assessed_data = self.retrieve_data(self.user_input.path) elif self.user_input.subject_type is SubjectType.csh: - print("---") - print(self.user_input) - print("---") self.assessed_data = self.user_input.metadata self.create_tasks() @@ -484,7 +480,6 @@ def _get_default_task_status(self, indicator: str) -> tuple[TaskStatus, bool]: return TaskStatus(config.pmr_assessment_status[indicator]), True if indicator in config.csh_metadata_status: - print("+++++++HUHU++++++") return TaskStatus(config.csh_metadata_status[indicator]), True if indicator in config.assessment_dependencies: @@ -591,7 +586,6 @@ def start_automated_tasks(self): if self.user_input.subject_type is not SubjectType.csh: task.do_evaluate(self.assessed_data.dict()) else: - print("???????????????") task.do_evaluate(self.assessed_data) def json(self): diff --git a/app/routers/csh_router.py b/app/routers/csh_router.py deleted file mode 100644 index 69e69cd..0000000 --- a/app/routers/csh_router.py +++ /dev/null @@ -1,43 +0,0 @@ -from fastapi import APIRouter -import json -from app.models.csh import study_evaluation, Score - -csh_router = APIRouter() - - - - - -@csh_router.get("/csh/study", tags=["Study"]) -def csh_study() -> Score: #metadata, schema_version - """ - **Parameters:** - - *metadata*: json containing the metadata of a CSH entry - **Returns:** - A string for testing - """ - #### stuff for testeing - #### loads local json - json_file_path = "app/csh_schemas/examples/DRKS00027974.json" - - # Open and read the JSON file - with open(json_file_path, 'r') as json_file: - json_data = json.load(json_file) - - - #print(json_data) - - ####----------------#### - - schema_version = "3.1" - evaluation = study_evaluation(json_data, schema_version) - - score: Score - score = evaluation.evaluate() - - print("!") - print(evaluation.score) - print(score) - print("?") - return score - diff --git a/tests/local_request.py b/tests/local_request.py index 9e7514f..7b09258 100644 --- a/tests/local_request.py +++ b/tests/local_request.py @@ -1,12 +1,13 @@ import requests import json +import time print("TESTING THE SERVER") # Define the URL of the local server url = 'http://localhost:8000/session' -# Define the data you want to send in the POST request (as a dictionary) +# metadata that will be used for development metadata = { "link": None, "resource": { @@ -346,7 +347,17 @@ # Check the response if response.status_code == 200: print("Request was successful.") - print("Response:", response.json()['tasks']) + + print("Response:", response.json()['status']) + time.sleep(3) + print("should be 3 seconds delayed") + #get session to look if its finished + while response.json()['status'] != 'finished': + time.sleep(3) + response = requests.get(url + '/' + response.json()['id'], ) + print('status: ', response.json()['status']) + + print(response.json()) else: print("Request failed with status code:", response.status_code) print(response.text)