Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Download Endpoint #2

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
urlpatterns = [
path('<str:name>/_search/', views.index, name='index'),
path('<str:name>/<str:id>', views.detail, name='detail'),
path('<str:name>/download/', views.download, name='download'),
path('fire_api/trackhubregistry/<str:genome_id>/<str:folder>/<str:doc_id>',
views.trackhubregistry_with_dirs_fire_api,
name='trackhubregistry_with_dirs_fire_api'),
Expand Down
98 changes: 98 additions & 0 deletions api/views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import requests
import json
import csv

from django.http import JsonResponse, HttpResponse
from elasticsearch import Elasticsearch
Expand All @@ -16,6 +17,7 @@
'protocol_analysis', 'analysis', 'summary_organism',
'summary_specimen', 'summary_dataset', 'summary_file']

ALLOWED_DOWNLOADS = ['file', 'organism', 'specimen', 'dataset']

@csrf_exempt
def index(request, name):
Expand Down Expand Up @@ -92,6 +94,102 @@ def index(request, name):

return JsonResponse(data)

@csrf_exempt
def download(request, name):
if request.method != 'GET':
return HttpResponse("This method is not allowed!\n")
if name not in ALLOWED_DOWNLOADS:
return HttpResponse("This download doesn't exist!\n")

# Request params
SIZE = 1000000
file_format = request.GET.get('file_format', '')
field = request.GET.get('_source', '')
column_names = request.GET.get('columns', '[]')
sort = request.GET.get('sort', '')
filters = request.GET.get('filters', '{}')

columns = field.split(',')
request_fields = []
for col in columns:
cols = col.split('.')
if cols[0] == '_source':
request_fields.append('.'.join(cols[1:]))
request_fields = ','.join(request_fields)
column_names = json.loads(column_names)

# generate query for filtering
filter_values = []
not_filter_values = []
filters = json.loads(filters)
for key in filters.keys():
if filters[key][0] != 'false':
filter_values.append({"terms": {key: filters[key]}})
else:
not_filter_values.append({"terms": {key: ["true"]}})
filter_val = {}
if filter_values:
filter_val['must'] = filter_values
if not_filter_values:
filter_val['must_not'] = not_filter_values
if filter_val:
filters = {"query": {"bool": filter_val}}

# Get records from elasticsearch
es = Elasticsearch([settings.NODE1, settings.NODE2])
data = es.search(index=name, _source=request_fields, sort=sort,
body=filters, size=SIZE)
records = data['hits']['hits']

# generate response payload
if file_format == 'csv':
filename = 'faang_data.csv'
content_type = 'text/csv'
else:
filename = 'faang_data.txt'
content_type = 'text/plain'
response = HttpResponse(content_type=content_type)
response['Content-Disposition'] = 'attachment; filename=' + filename

# generate csv data
writer = csv.DictWriter(response, fieldnames=columns)
headers = {}
i = 0
for col in columns:
headers[col] = column_names[i]
i += 1
writer.writerow(headers)
for row in records:
record = {}
for col in columns:
cols = col.split('.')
record[col] = ''
source = row
for c in cols:
if isinstance(source, dict) and c in source.keys():
record[col] = source[c]
source = source[c]
else:
record[col] = ''
break
writer.writerow(record)

# return formatted data
if file_format == 'csv':
return response
else:
# add padding to align with max length data in a column
def space(i, d):
max_len = len(max(list(zip(*row_data))[i], key=len))
return d+b' '*(max_len-len(d))

# create fixed width and '|' separated tabular text file
data = response.content
row_data = [i.rstrip(b'\r\n').split(b',') for i in filter(None, data.split(b'\n'))]
align = [b' | '.join(space(*c) for c in enumerate(b)) for b in row_data]
tab_data = b'\n'.join(align)
response.content = tab_data
return response

@csrf_exempt
def detail(request, name, id):
Expand Down