From 8e22a5cc4ee6a3fc110d892065377dc2af6c3563 Mon Sep 17 00:00:00 2001 From: Pedro Assis Date: Fri, 25 Jun 2021 13:49:39 -1000 Subject: [PATCH] redirecting region search to data services --- src/encoded/genomic_data_service.py | 18 ++++++++++++++++++ src/encoded/region_search.py | 27 ++++++++++++--------------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/encoded/genomic_data_service.py b/src/encoded/genomic_data_service.py index 4f8c39b50b7..1f1b7bd25eb 100644 --- a/src/encoded/genomic_data_service.py +++ b/src/encoded/genomic_data_service.py @@ -14,6 +14,7 @@ ] RNA_GET_EXPRESSIONS = '/expressions/bytes' RNA_GET_AUTOCOMPLETE = '/autocomplete' +REGION_SEARCH = '/region-search' # react component orders columns by "the position" in the hash map RNA_GET_COLUMNS = { @@ -262,3 +263,20 @@ def rna_get(self): } return response + + + def region_search(self, assembly, chromosome, start, end): + params = { + 'assembly': assembly, + 'chr': chromosome.replace('chr', ''), + 'start': start, + 'end': end + } + + query_params = '&'.join([f'{k}={params[k]}' for k in params.keys()]) + + url = f'{self.path}{REGION_SEARCH}?{query_params}' + + results = requests.get(url, timeout=3).json() + + return results diff --git a/src/encoded/region_search.py b/src/encoded/region_search.py index 766fbd5a4c0..aabe3883174 100644 --- a/src/encoded/region_search.py +++ b/src/encoded/region_search.py @@ -8,6 +8,8 @@ import requests from urllib.parse import urlencode +from encoded.genomic_data_service import GenomicDataService + import logging import re @@ -253,6 +255,9 @@ def region_search(context, request): """ Search files by region. """ + + data_service = GenomicDataService(context.registry, request) + types = request.registry[TYPES] result = { '@id': '/region-search/' + ('?' + request.query_string.split('&referrer')[0] if request.query_string else ''), @@ -314,25 +319,17 @@ def region_search(context, request): chr=chromosome, start=start, end=end ) - # Search for peaks for the coordinates we got try: - # including inner hits is very slow - # figure out how to distinguish browser requests from .embed method requests - if 'peak_metadata' in request.query_string: - peak_query = get_peak_query(start, end, with_inner_hits=True, within_peaks=region_inside_peak_status) - else: - peak_query = get_peak_query(start, end, within_peaks=region_inside_peak_status) - peak_results = snp_es.search(body=peak_query, - index=chromosome.lower(), - doc_type=_GENOME_TO_ALIAS[assembly], - size=99999) + peak_results = data_service.region_search(_GENOME_TO_ALIAS[assembly], chromosome.lower(), start, end) except Exception: result['notification'] = 'Error during search' return result + file_uuids = [] - for hit in peak_results['hits']['hits']: - if hit['_id'] not in file_uuids: - file_uuids.append(hit['_id']) + for hit in peak_results['regions']: + region_uuid = hit['file_url'].split('/')[-1] + if region_uuid not in file_uuids: + file_uuids.append(region_uuid) file_uuids = list(set(file_uuids)) result['notification'] = 'No results found' @@ -363,7 +360,7 @@ def region_search(context, request): result['@graph'] = list(format_results(request, es_results['hits']['hits'])) result['total'] = total = es_results['hits']['total'] result['facets'] = format_facets(es_results, _FACETS, used_filters, schemas, total, principals) - result['peaks'] = list(peak_results['hits']['hits']) + result['peaks'] = list(peak_results['regions']) result['download_elements'] = get_peak_metadata_links(request) if result['total'] > 0: result['notification'] = 'Success'