Skip to content

Commit

Permalink
Merge branch 'libpitt/118-readme-ontology'
Browse files Browse the repository at this point in the history
  • Loading branch information
maxsibilla committed Aug 17, 2023
2 parents e55036d + 89e20e1 commit 54971e4
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 143 deletions.
2 changes: 1 addition & 1 deletion src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from hubmap_commons import neo4j_driver
from atlas_consortia_commons.ubkg import initialize_ubkg
from atlas_consortia_commons.rest import get_http_exceptions_classes, abort_err_handler
from atlas_consortia_commons.ubkg.ubkg_sdk import init_ontology

from routes.auth import auth_blueprint
from routes.status import status_blueprint
Expand All @@ -21,7 +22,6 @@

# Local Modules
from lib.file_upload_helper import UploadFileHelper
from lib.ontology import init_ontology

# Set logging format and level (default is warning)
# All the API logging is forwarded to the uWSGI server and gets written into the log file `uwsgi-ingest-api.log`
Expand Down
122 changes: 9 additions & 113 deletions src/lib/ontology.py
Original file line number Diff line number Diff line change
@@ -1,122 +1,18 @@
import logging

from atlas_consortia_commons.object import build_enum_class
from atlas_consortia_commons.ubkg import get_from_node
from atlas_consortia_commons.string import to_snake_case_upper, equals
import base64

from atlas_consortia_commons.ubkg.ubkg_sdk import UbkgSDK
from flask import current_app

logger = logging.getLogger(__name__)

def _get_obj_type(in_enum, as_data_dict: bool = False):
if as_data_dict:
return 'dict'
else:
return 'enum' if in_enum else 'class'


def _get_response(obj, url_params=None):
endpoint = get_from_node(obj, 'endpoint')
if type(obj) is not str and endpoint:
if url_params is None:
return current_app.ubkg.get_ubkg_by_endpoint(obj)
else:
key = base64.b64encode(url_params.encode('utf-8')).decode('utf-8')
key = key.replace("=", '')
return current_app.ubkg.get_ubkg(obj, key, f"{endpoint}{url_params}")
else:
return current_app.ubkg.get_ubkg_valueset(obj)


def _build_enum_class(name: str, obj, key: str = 'term', val_key: str = None, prop_callback=to_snake_case_upper,
obj_type: str = 'class', data_as_val=False, url_params=None):
response = _get_response(obj, url_params=url_params)
return build_enum_class(name, response, key, val_key=val_key, prop_callback=prop_callback,
obj_type=obj_type, data_as_val=data_as_val)


def entities(in_enum: bool = False, as_data_dict: bool = False):
return _build_enum_class('Entities', current_app.ubkg.entities, obj_type=_get_obj_type(in_enum, as_data_dict))


def specimen_categories(in_enum: bool = False, as_data_dict: bool = False):
return _build_enum_class('SpecimenCategories', current_app.ubkg.specimen_categories,
obj_type=_get_obj_type(in_enum, as_data_dict))


def organ_types(in_enum: bool = False, as_data_dict: bool = False):
return _build_enum_class('OrganTypes', current_app.ubkg.organ_types, key='rui_code', val_key='term',
obj_type=_get_obj_type(in_enum, as_data_dict))


def assay_types(in_enum: bool = False, as_data_dict: bool = False,
prop_callback=to_snake_case_upper, data_as_val=False, url_params=None):
return _build_enum_class('AssayTypes', current_app.ubkg.assay_types, key='data_type',
obj_type=_get_obj_type(in_enum, as_data_dict),
prop_callback=prop_callback, data_as_val=data_as_val, url_params=url_params)


def source_types(in_enum: bool = False, as_data_dict: bool = False):
return _build_enum_class('SourceTypes', current_app.ubkg.source_types,
obj_type=_get_obj_type(in_enum, as_data_dict))



def init_ontology():
specimen_categories()
organ_types()
entities()
assay_types()
source_types()

def enum_val_lower(val):
return val.value.lower()

def ubkg_sever():
return current_app.config['UBKG_SERVER']

def get_valueset_ep(code):
ep = f"{ubkg_sever()}{current_app.config['UBKG_ENDPOINT_VALUESET']}"
return ep.format(code=code)

def get_organ_types_ep():
return f"{ubkg_sever()}{get_from_node(current_app.ubkg.organ_types, 'endpoint')}"

def get_assay_types_ep():
return f"{ubkg_sever()}{get_from_node(current_app.ubkg.assay_types, 'endpoint')}"


class Ontology:
@staticmethod
def entities(as_arr: bool = False, cb=str, as_data_dict: bool = False):
return Ontology._as_list_or_class(entities(as_arr, as_data_dict), as_arr, cb)
return UbkgSDK.get_endpoint(current_app.ubkg.organ_types)

@staticmethod
def assay_types(as_arr: bool = False, cb=str, as_data_dict: bool = False, prop_callback=to_snake_case_upper,
data_as_val=False, url_params=None):
return Ontology._as_list_or_class(assay_types(as_arr, as_data_dict, prop_callback,
data_as_val=data_as_val, url_params=url_params), as_arr, cb)

@staticmethod
def assay_types_ext(as_arr: bool = False, cb=str, as_data_dict: bool = False, prop_callback=to_snake_case_upper,
data_as_val=False):
return Ontology.assay_types(as_arr=as_arr, cb=cb, as_data_dict=as_data_dict, data_as_val=data_as_val,
prop_callback=prop_callback, url_params='&dataset_provider=external')

@staticmethod
def specimen_categories(as_arr: bool = False, cb=str, as_data_dict: bool = False):
return Ontology._as_list_or_class(specimen_categories(as_arr, as_data_dict), as_arr, cb)

@staticmethod
def organ_types(as_arr: bool = False, cb=str, as_data_dict: bool = False):
return Ontology._as_list_or_class(organ_types(as_arr, as_data_dict), as_arr, cb)
def get_assay_types_ep():
return UbkgSDK.get_endpoint(current_app.ubkg.assay_types)

@staticmethod
def source_types(as_arr: bool = False, cb=str, as_data_dict: bool = False):
return Ontology._as_list_or_class(source_types(as_arr, as_data_dict), as_arr, cb)

class Ontology(UbkgSDK):
@staticmethod
def _as_list_or_class(obj, as_arr: bool = False, cb=str):
return obj if not as_arr else list(map(cb, obj))

def assay_types_ext():
Ontology.Ops.key = 'data_type'
Ontology.Ops.url_params = '&dataset_provider=external'
return Ontology.transform_ontology(current_app.ubkg.assay_types, 'AssayTypesExt')
2 changes: 1 addition & 1 deletion src/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ requests==2.25.1
# Default is main branch specified in docker-compose.development.yml if not set
# git+https://github.com/hubmapconsortium/commons.git@${COMMONS_BRANCH}#egg=hubmap-commons
hubmap-commons==2.1.9
atlas-consortia-commons==1.0.3
atlas-consortia-commons==1.0.4

# Testing
pytest==7.3.1
39 changes: 20 additions & 19 deletions src/routes/entity_CRUD/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from hubmap_commons import string_helper
from atlas_consortia_commons.rest import *
from atlas_consortia_commons.string import equals
from atlas_consortia_commons.object import includes
from atlas_consortia_commons.object import includes, enum_val_lower

from lib.file_upload_helper import UploadFileHelper
from lib.datacite_doi_helper import DataCiteDoiHelper
Expand All @@ -31,7 +31,7 @@
from routes.entity_CRUD.dataset_helper import DatasetHelper
from routes.entity_CRUD.constraints_helper import *
from routes.auth import get_auth_header, get_auth_header_dict
from lib.ontology import Ontology, enum_val_lower, get_organ_types_ep, get_assay_types_ep
from lib.ontology import Ontology, get_organ_types_ep, get_assay_types_ep
from lib.file import get_csv_records, get_base_path, check_upload, ln_err


Expand Down Expand Up @@ -78,7 +78,7 @@ def create_dataset():

@entity_CRUD_blueprint.route('/sources/bulk/validate', methods=['POST'])
def bulk_sources_upload_and_validate():
return _bulk_upload_and_validate(Ontology.entities().SOURCE)
return _bulk_upload_and_validate(Ontology.ops().entities().SOURCE)


@entity_CRUD_blueprint.route('/sources/bulk/register', methods=['POST'])
Expand Down Expand Up @@ -121,7 +121,7 @@ def create_sources_from_bulk():

@entity_CRUD_blueprint.route('/samples/bulk/validate', methods=['POST'])
def bulk_samples_upload_and_validate():
return _bulk_upload_and_validate(Ontology.entities().SAMPLE)
return _bulk_upload_and_validate(Ontology.ops().entities().SAMPLE)


@entity_CRUD_blueprint.route('/samples/bulk/register', methods=['POST'])
Expand Down Expand Up @@ -171,7 +171,7 @@ def create_samples_from_bulk():

@entity_CRUD_blueprint.route('/datasets/bulk/validate', methods=['POST'])
def bulk_datasets_upload_and_validate():
return _bulk_upload_and_validate(Ontology.entities().DATASET)
return _bulk_upload_and_validate(Ontology.ops().entities().DATASET)


@entity_CRUD_blueprint.route('/datasets/bulk/register', methods=['POST'])
Expand Down Expand Up @@ -623,7 +623,7 @@ def publish_datastage(identifier):
entity_dict: dict = vars(entity)
# data_type_edp: List[str] = \
# get_data_type_of_external_dataset_providers(current_app.config['UBKG_WEBSERVICE_URL'])
data_type_edp = list(Ontology.assay_types_ext(as_data_dict=True).values())
data_type_edp = list(Ontology.ops(as_data_dict=True).assay_types_ext().values())
entity_lab_processed_data_types: List[str] = \
[i for i in entity_dict.get('data_types') if i in data_type_edp]
has_entity_lab_processed_data_type: bool = len(entity_lab_processed_data_types) > 0
Expand Down Expand Up @@ -651,6 +651,8 @@ def publish_datastage(identifier):
if asset_dir_exists:
ingest_helper.relink_to_public(dataset_uuid)

acls_cmd = ingest_helper.set_dataset_permissions(dataset_uuid, dataset_group_uuid, data_access_level,
True, no_indexing_and_acls)

auth_tokens = auth_helper.getAuthorizationTokens(request.headers)
entity_instance = EntitySdk(token=auth_tokens, service_url=current_app.config['ENTITY_WEBSERVICE_URL'])
Expand Down Expand Up @@ -745,11 +747,11 @@ def _bulk_upload_and_validate(entity):
csv_records = get_csv_records(file_location)
headers, records = itemgetter('headers', 'records')(csv_records)

if entity == Ontology.entities().SOURCE:
if entity == Ontology.ops().entities().SOURCE:
valid_file = validate_sources(headers, records)
elif entity == Ontology.entities().SAMPLE:
elif entity == Ontology.ops().entities().SAMPLE:
valid_file = validate_samples(headers, records, header)
elif entity == Ontology.entities().DATASET:
elif entity == Ontology.ops().entities().DATASET:
records = _format_dataset_records(records)
valid_file = validate_datasets(headers, records, header)
else:
Expand Down Expand Up @@ -864,7 +866,7 @@ def is_invalid_doi(protocol):
def validate_sources(headers, records):
error_msg = []
file_is_valid = True
allowed_source_types = Ontology.source_types(True, enum_val_lower)
allowed_source_types = Ontology.ops(as_arr=True, cb=enum_val_lower).source_types()

required_headers = ['lab_id', 'source_type', 'selection_protocol', 'lab_notes']
for field in required_headers:
Expand Down Expand Up @@ -944,12 +946,12 @@ def validate_samples(headers, records, header):
file_is_valid = False
error_msg.append(_common_ln_errs(2, field))

allowed_categories = Ontology.specimen_categories(True, enum_val_lower)
allowed_categories = Ontology.ops(as_arr=True, cb=enum_val_lower).specimen_categories()
# Get the ontology classes
SpecimenCategories = Ontology.specimen_categories()
Entities = Ontology.entities()
SpecimenCategories = Ontology.ops().specimen_categories()
Entities = Ontology.ops().entities()

organ_types_codes = list(Ontology.organ_types(as_data_dict=True).keys())
organ_types_codes = list(Ontology.ops(as_data_dict=True, key='rui_code', val_key='term').organ_types().keys())

rownum = 0
valid_ancestor_ids = []
Expand Down Expand Up @@ -1091,8 +1093,7 @@ def validate_datasets(headers, records, header):
file_is_valid = False
error_msg.append(_common_ln_errs(2, field))


assay_types = list(Ontology.assay_types(as_data_dict=True, prop_callback=None).keys())
assay_types = list(Ontology.ops(as_data_dict=True, prop_callback=None).assay_types().keys())

rownum = 0
entity_constraint_list = []
Expand Down Expand Up @@ -1170,7 +1171,7 @@ def validate_datasets(headers, records, header):
if data_types_valid:
sub_type = get_as_list(data_types)

entity_to_validate = build_constraint_unit(Ontology.entities().DATASET, sub_type)
entity_to_validate = build_constraint_unit(Ontology.ops().entities().DATASET, sub_type)

try:
entity_constraint_list = append_constraints_list(entity_to_validate, ancestor_dict, header, entity_constraint_list, ancestor_id)
Expand Down Expand Up @@ -1230,7 +1231,7 @@ def validate_ancestor_id(header, ancestor_id, error_msg, rownum, valid_ancestor_


def append_constraints_list(entity_to_validate, ancestor_dict, header, entity_constraint_list, ancestor_id):
Entities = Ontology.entities()
Entities = Ontology.ops().entities()
ancestor_entity_type = ancestor_dict['type'].lower()
url = commons_file_helper.ensureTrailingSlashURL(current_app.config['ENTITY_WEBSERVICE_URL']) + 'entities/' + ancestor_id

Expand All @@ -1242,7 +1243,7 @@ def append_constraints_list(entity_to_validate, ancestor_dict, header, entity_co

if equals(ancestor_entity_type, Entities.SAMPLE):
sub_type = get_as_list(ancestor_result['sample_category'])
if equals(ancestor_result['sample_category'], Ontology.specimen_categories().ORGAN):
if equals(ancestor_result['sample_category'], Ontology.ops().specimen_categories().ORGAN):
sub_type_val = get_as_list(ancestor_result['organ'])

ancestor_to_validate = build_constraint_unit(ancestor_entity_type, sub_type, sub_type_val)
Expand Down
18 changes: 9 additions & 9 deletions src/routes/validation/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ def create_tsv_from_path(path, row):


def determine_schema(entity_type, sub_type):
if equals(entity_type, Ontology.entities().SOURCE):
if equals(entity_type, Ontology.ops().entities().SOURCE):
schema = 'murine-source'
elif equals(entity_type, Ontology.entities().SAMPLE):
elif equals(entity_type, Ontology.ops().entities().SAMPLE):
if not sub_type:
return rest_bad_req("`sub_type` for schema name required.")
schema = f"sample-{sub_type}"
Expand All @@ -114,27 +114,27 @@ def _get_response(metadata, entity_type, sub_type, validate_uuids, pathname=None


def get_col_id_name_by_entity_type(entity_type):
if equals(entity_type, Ontology.entities().SAMPLE):
if equals(entity_type, Ontology.ops().entities().SAMPLE):
return 'sample_id'
else:
return 'source_id'


def get_sub_type_name_by_entity_type(entity_type):
if equals(entity_type, Ontology.entities().SAMPLE):
if equals(entity_type, Ontology.ops().entities().SAMPLE):
return 'sample_category'
else:
return 'source_type'


def supported_metadata_sub_types(entity_type):
if equals(entity_type, Ontology.entities().SOURCE):
return [Ontology.source_types().MOUSE]
if equals(entity_type, Ontology.ops().entities().SOURCE):
return [Ontology.ops().source_types().MOUSE]
else:
return [
Ontology.specimen_categories().BLOCK,
Ontology.specimen_categories().SECTION,
Ontology.specimen_categories().SUSPENSION]
Ontology.ops().specimen_categories().BLOCK,
Ontology.ops().specimen_categories().SECTION,
Ontology.ops().specimen_categories().SUSPENSION]

def validate_records_uuids(records, entity_type, sub_type, pathname):
errors = []
Expand Down

0 comments on commit 54971e4

Please sign in to comment.