Merge branch 'libpitt/118-readme-ontology'

sennetconsortium · Aug 17, 2023 · 54971e4 · 54971e4
2 parents e55036d + 89e20e1
commit 54971e4
Show file tree

Hide file tree

Showing 5 changed files with 40 additions and 143 deletions.
diff --git a/src/app.py b/src/app.py
@@ -11,6 +11,7 @@
 from hubmap_commons import neo4j_driver
 from atlas_consortia_commons.ubkg import initialize_ubkg
 from atlas_consortia_commons.rest import get_http_exceptions_classes, abort_err_handler
+from atlas_consortia_commons.ubkg.ubkg_sdk import init_ontology
 
 from routes.auth import auth_blueprint
 from routes.status import status_blueprint
@@ -21,7 +22,6 @@
 
 # Local Modules
 from lib.file_upload_helper import UploadFileHelper
-from lib.ontology import init_ontology
 
 # Set logging format and level (default is warning)
 # All the API logging is forwarded to the uWSGI server and gets written into the log file `uwsgi-ingest-api.log`

diff --git a/src/lib/ontology.py b/src/lib/ontology.py
@@ -1,122 +1,18 @@
-import logging
-
-from atlas_consortia_commons.object import build_enum_class
-from atlas_consortia_commons.ubkg import get_from_node
-from atlas_consortia_commons.string import to_snake_case_upper, equals
-import base64
-
+from atlas_consortia_commons.ubkg.ubkg_sdk import UbkgSDK
 from flask import current_app
 
-logger = logging.getLogger(__name__)
-
-def _get_obj_type(in_enum, as_data_dict: bool = False):
-    if as_data_dict:
-        return 'dict'
-    else:
-        return 'enum' if in_enum else 'class'
-
-
-def _get_response(obj, url_params=None):
-    endpoint = get_from_node(obj, 'endpoint')
-    if type(obj) is not str and endpoint:
-        if url_params is None:
-            return current_app.ubkg.get_ubkg_by_endpoint(obj)
-        else:
-            key = base64.b64encode(url_params.encode('utf-8')).decode('utf-8')
-            key = key.replace("=", '')
-            return current_app.ubkg.get_ubkg(obj, key, f"{endpoint}{url_params}")
-    else:
-        return current_app.ubkg.get_ubkg_valueset(obj)
-
-
-def _build_enum_class(name: str, obj, key: str = 'term', val_key: str = None, prop_callback=to_snake_case_upper,
-                      obj_type: str = 'class', data_as_val=False, url_params=None):
-    response = _get_response(obj, url_params=url_params)
-    return build_enum_class(name, response, key, val_key=val_key, prop_callback=prop_callback,
-                            obj_type=obj_type, data_as_val=data_as_val)
-
-
-def entities(in_enum: bool = False, as_data_dict: bool = False):
-    return _build_enum_class('Entities', current_app.ubkg.entities, obj_type=_get_obj_type(in_enum, as_data_dict))
-
-
-def specimen_categories(in_enum: bool = False, as_data_dict: bool = False):
-    return _build_enum_class('SpecimenCategories', current_app.ubkg.specimen_categories,
-                             obj_type=_get_obj_type(in_enum, as_data_dict))
-
-
-def organ_types(in_enum: bool = False, as_data_dict: bool = False):
-    return _build_enum_class('OrganTypes', current_app.ubkg.organ_types, key='rui_code', val_key='term',
-                             obj_type=_get_obj_type(in_enum, as_data_dict))
-
-
-def assay_types(in_enum: bool = False, as_data_dict: bool = False,
-                prop_callback=to_snake_case_upper, data_as_val=False, url_params=None):
-    return _build_enum_class('AssayTypes', current_app.ubkg.assay_types, key='data_type',
-                             obj_type=_get_obj_type(in_enum, as_data_dict),
-                             prop_callback=prop_callback, data_as_val=data_as_val, url_params=url_params)
-
-
-def source_types(in_enum: bool = False, as_data_dict: bool = False):
-    return _build_enum_class('SourceTypes', current_app.ubkg.source_types,
-                             obj_type=_get_obj_type(in_enum, as_data_dict))
-
-
-
-def init_ontology():
-    specimen_categories()
-    organ_types()
-    entities()
-    assay_types()
-    source_types()
-
-def enum_val_lower(val):
-    return val.value.lower()
-
-def ubkg_sever():
-    return current_app.config['UBKG_SERVER']
-
-def get_valueset_ep(code):
-    ep = f"{ubkg_sever()}{current_app.config['UBKG_ENDPOINT_VALUESET']}"
-    return ep.format(code=code)
 
 def get_organ_types_ep():
-    return f"{ubkg_sever()}{get_from_node(current_app.ubkg.organ_types, 'endpoint')}"
-
-def get_assay_types_ep():
-    return f"{ubkg_sever()}{get_from_node(current_app.ubkg.assay_types, 'endpoint')}"
-
-
-class Ontology:
-    @staticmethod
-    def entities(as_arr: bool = False, cb=str, as_data_dict: bool = False):
-        return Ontology._as_list_or_class(entities(as_arr, as_data_dict), as_arr, cb)
+    return UbkgSDK.get_endpoint(current_app.ubkg.organ_types)
 
-    @staticmethod
-    def assay_types(as_arr: bool = False, cb=str, as_data_dict: bool = False, prop_callback=to_snake_case_upper,
-                    data_as_val=False, url_params=None):
-        return Ontology._as_list_or_class(assay_types(as_arr, as_data_dict, prop_callback,
-                                                      data_as_val=data_as_val, url_params=url_params), as_arr, cb)
-
-    @staticmethod
-    def assay_types_ext(as_arr: bool = False, cb=str, as_data_dict: bool = False, prop_callback=to_snake_case_upper,
-                        data_as_val=False):
-        return Ontology.assay_types(as_arr=as_arr, cb=cb, as_data_dict=as_data_dict, data_as_val=data_as_val,
-                                    prop_callback=prop_callback, url_params='&dataset_provider=external')
-
-    @staticmethod
-    def specimen_categories(as_arr: bool = False, cb=str, as_data_dict: bool = False):
-        return Ontology._as_list_or_class(specimen_categories(as_arr, as_data_dict), as_arr, cb)
 
-    @staticmethod
-    def organ_types(as_arr: bool = False, cb=str, as_data_dict: bool = False):
-        return Ontology._as_list_or_class(organ_types(as_arr, as_data_dict), as_arr, cb)
+def get_assay_types_ep():
+    return UbkgSDK.get_endpoint(current_app.ubkg.assay_types)
 
-    @staticmethod
-    def source_types(as_arr: bool = False, cb=str, as_data_dict: bool = False):
-        return Ontology._as_list_or_class(source_types(as_arr, as_data_dict), as_arr, cb)
 
+class Ontology(UbkgSDK):
     @staticmethod
-    def _as_list_or_class(obj, as_arr: bool = False, cb=str):
-        return obj if not as_arr else list(map(cb, obj))
-
+    def assay_types_ext():
+        Ontology.Ops.key = 'data_type'
+        Ontology.Ops.url_params = '&dataset_provider=external'
+        return Ontology.transform_ontology(current_app.ubkg.assay_types, 'AssayTypesExt')
diff --git a/src/requirements.txt b/src/requirements.txt
@@ -10,7 +10,7 @@ requests==2.25.1
 # Default is main branch specified in docker-compose.development.yml if not set
 # git+https://github.com/hubmapconsortium/commons.git@${COMMONS_BRANCH}#egg=hubmap-commons
 hubmap-commons==2.1.9
-atlas-consortia-commons==1.0.3
+atlas-consortia-commons==1.0.4
 
 # Testing
 pytest==7.3.1
diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py
@@ -17,7 +17,7 @@
 from hubmap_commons import string_helper
 from atlas_consortia_commons.rest import *
 from atlas_consortia_commons.string import equals
-from atlas_consortia_commons.object import includes
+from atlas_consortia_commons.object import includes, enum_val_lower
 
 from lib.file_upload_helper import UploadFileHelper
 from lib.datacite_doi_helper import DataCiteDoiHelper
@@ -31,7 +31,7 @@
 from routes.entity_CRUD.dataset_helper import DatasetHelper
 from routes.entity_CRUD.constraints_helper import *
 from routes.auth import get_auth_header, get_auth_header_dict
-from lib.ontology import Ontology, enum_val_lower, get_organ_types_ep, get_assay_types_ep
+from lib.ontology import Ontology, get_organ_types_ep, get_assay_types_ep
 from lib.file import get_csv_records, get_base_path, check_upload, ln_err
 
 
@@ -78,7 +78,7 @@ def create_dataset():
 
 @entity_CRUD_blueprint.route('/sources/bulk/validate', methods=['POST'])
 def bulk_sources_upload_and_validate():
-    return _bulk_upload_and_validate(Ontology.entities().SOURCE)
+    return _bulk_upload_and_validate(Ontology.ops().entities().SOURCE)
 
 
 @entity_CRUD_blueprint.route('/sources/bulk/register', methods=['POST'])
@@ -121,7 +121,7 @@ def create_sources_from_bulk():
 
 @entity_CRUD_blueprint.route('/samples/bulk/validate', methods=['POST'])
 def bulk_samples_upload_and_validate():
-    return _bulk_upload_and_validate(Ontology.entities().SAMPLE)
+    return _bulk_upload_and_validate(Ontology.ops().entities().SAMPLE)
 
 
 @entity_CRUD_blueprint.route('/samples/bulk/register', methods=['POST'])
@@ -171,7 +171,7 @@ def create_samples_from_bulk():
 
 @entity_CRUD_blueprint.route('/datasets/bulk/validate', methods=['POST'])
 def bulk_datasets_upload_and_validate():
-    return _bulk_upload_and_validate(Ontology.entities().DATASET)
+    return _bulk_upload_and_validate(Ontology.ops().entities().DATASET)
 
 
 @entity_CRUD_blueprint.route('/datasets/bulk/register', methods=['POST'])
@@ -623,7 +623,7 @@ def publish_datastage(identifier):
             entity_dict: dict = vars(entity)
             # data_type_edp: List[str] = \
             #     get_data_type_of_external_dataset_providers(current_app.config['UBKG_WEBSERVICE_URL'])
-            data_type_edp = list(Ontology.assay_types_ext(as_data_dict=True).values())
+            data_type_edp = list(Ontology.ops(as_data_dict=True).assay_types_ext().values())
             entity_lab_processed_data_types: List[str] = \
                 [i for i in entity_dict.get('data_types') if i in data_type_edp]
             has_entity_lab_processed_data_type: bool = len(entity_lab_processed_data_types) > 0
@@ -651,6 +651,8 @@ def publish_datastage(identifier):
                 if asset_dir_exists:
                     ingest_helper.relink_to_public(dataset_uuid)
 
+            acls_cmd = ingest_helper.set_dataset_permissions(dataset_uuid, dataset_group_uuid, data_access_level,
+                                                             True, no_indexing_and_acls)
 
             auth_tokens = auth_helper.getAuthorizationTokens(request.headers)
             entity_instance = EntitySdk(token=auth_tokens, service_url=current_app.config['ENTITY_WEBSERVICE_URL'])
@@ -745,11 +747,11 @@ def _bulk_upload_and_validate(entity):
     csv_records = get_csv_records(file_location)
     headers, records = itemgetter('headers', 'records')(csv_records)
 
-    if entity == Ontology.entities().SOURCE:
+    if entity == Ontology.ops().entities().SOURCE:
         valid_file = validate_sources(headers, records)
-    elif entity == Ontology.entities().SAMPLE:
+    elif entity == Ontology.ops().entities().SAMPLE:
         valid_file = validate_samples(headers, records, header)
-    elif entity == Ontology.entities().DATASET:
+    elif entity == Ontology.ops().entities().DATASET:
         records = _format_dataset_records(records)
         valid_file = validate_datasets(headers, records, header)
     else:
@@ -864,7 +866,7 @@ def is_invalid_doi(protocol):
 def validate_sources(headers, records):
     error_msg = []
     file_is_valid = True
-    allowed_source_types = Ontology.source_types(True, enum_val_lower)
+    allowed_source_types = Ontology.ops(as_arr=True, cb=enum_val_lower).source_types()
 
     required_headers = ['lab_id', 'source_type', 'selection_protocol', 'lab_notes']
     for field in required_headers:
@@ -944,12 +946,12 @@ def validate_samples(headers, records, header):
             file_is_valid = False
             error_msg.append(_common_ln_errs(2, field))
 
-    allowed_categories = Ontology.specimen_categories(True, enum_val_lower)
+    allowed_categories = Ontology.ops(as_arr=True, cb=enum_val_lower).specimen_categories()
     # Get the ontology classes
-    SpecimenCategories = Ontology.specimen_categories()
-    Entities = Ontology.entities()
+    SpecimenCategories = Ontology.ops().specimen_categories()
+    Entities = Ontology.ops().entities()
 
-    organ_types_codes = list(Ontology.organ_types(as_data_dict=True).keys())
+    organ_types_codes = list(Ontology.ops(as_data_dict=True, key='rui_code', val_key='term').organ_types().keys())
 
     rownum = 0
     valid_ancestor_ids = []
@@ -1091,8 +1093,7 @@ def validate_datasets(headers, records, header):
             file_is_valid = False
             error_msg.append(_common_ln_errs(2, field))
 
-
-    assay_types = list(Ontology.assay_types(as_data_dict=True, prop_callback=None).keys())
+    assay_types = list(Ontology.ops(as_data_dict=True, prop_callback=None).assay_types().keys())
 
     rownum = 0
     entity_constraint_list = []
@@ -1170,7 +1171,7 @@ def validate_datasets(headers, records, header):
                     if data_types_valid:
                         sub_type = get_as_list(data_types)
 
-                    entity_to_validate = build_constraint_unit(Ontology.entities().DATASET, sub_type)
+                    entity_to_validate = build_constraint_unit(Ontology.ops().entities().DATASET, sub_type)
 
                     try:
                         entity_constraint_list = append_constraints_list(entity_to_validate, ancestor_dict, header, entity_constraint_list, ancestor_id)
@@ -1230,7 +1231,7 @@ def validate_ancestor_id(header, ancestor_id, error_msg, rownum, valid_ancestor_
 
 
 def append_constraints_list(entity_to_validate, ancestor_dict, header, entity_constraint_list, ancestor_id):
-    Entities = Ontology.entities()
+    Entities = Ontology.ops().entities()
     ancestor_entity_type = ancestor_dict['type'].lower()
     url = commons_file_helper.ensureTrailingSlashURL(current_app.config['ENTITY_WEBSERVICE_URL']) + 'entities/' + ancestor_id
 
@@ -1242,7 +1243,7 @@ def append_constraints_list(entity_to_validate, ancestor_dict, header, entity_co
 
     if equals(ancestor_entity_type, Entities.SAMPLE):
         sub_type = get_as_list(ancestor_result['sample_category'])
-        if equals(ancestor_result['sample_category'], Ontology.specimen_categories().ORGAN):
+        if equals(ancestor_result['sample_category'], Ontology.ops().specimen_categories().ORGAN):
             sub_type_val = get_as_list(ancestor_result['organ'])
 
     ancestor_to_validate = build_constraint_unit(ancestor_entity_type, sub_type, sub_type_val)

diff --git a/src/routes/validation/validation.py b/src/routes/validation/validation.py
@@ -87,9 +87,9 @@ def create_tsv_from_path(path, row):
 
 
 def determine_schema(entity_type, sub_type):
-    if equals(entity_type, Ontology.entities().SOURCE):
+    if equals(entity_type, Ontology.ops().entities().SOURCE):
         schema = 'murine-source'
-    elif equals(entity_type, Ontology.entities().SAMPLE):
+    elif equals(entity_type, Ontology.ops().entities().SAMPLE):
         if not sub_type:
             return rest_bad_req("`sub_type` for schema name required.")
         schema = f"sample-{sub_type}"
@@ -114,27 +114,27 @@ def _get_response(metadata, entity_type, sub_type, validate_uuids, pathname=None
 
 
 def get_col_id_name_by_entity_type(entity_type):
-    if equals(entity_type, Ontology.entities().SAMPLE):
+    if equals(entity_type, Ontology.ops().entities().SAMPLE):
         return 'sample_id'
     else:
         return 'source_id'
 
 
 def get_sub_type_name_by_entity_type(entity_type):
-    if equals(entity_type, Ontology.entities().SAMPLE):
+    if equals(entity_type, Ontology.ops().entities().SAMPLE):
         return 'sample_category'
     else:
         return 'source_type'
 
 
 def supported_metadata_sub_types(entity_type):
-    if equals(entity_type, Ontology.entities().SOURCE):
-        return [Ontology.source_types().MOUSE]
+    if equals(entity_type, Ontology.ops().entities().SOURCE):
+        return [Ontology.ops().source_types().MOUSE]
     else:
         return [
-            Ontology.specimen_categories().BLOCK,
-            Ontology.specimen_categories().SECTION,
-            Ontology.specimen_categories().SUSPENSION]
+            Ontology.ops().specimen_categories().BLOCK,
+            Ontology.ops().specimen_categories().SECTION,
+            Ontology.ops().specimen_categories().SUSPENSION]
 
 def validate_records_uuids(records, entity_type, sub_type, pathname):
     errors = []