Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Download submissions in a single CSV filr #909

Closed
wants to merge 9 commits into from
1 change: 1 addition & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Right now, we are in the process of building the prototype. We warmly welcome yo
Create pull requests (PRs) for changes that you think are needed. We would really appreciate your help!

Skills with the following would be beneficial:

- Python
- FastAPI
- Javascript
Expand Down
11 changes: 11 additions & 0 deletions src/backend/app/central/central_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,17 @@ def download_submissions(
return fixed.splitlines()


def download_submissions_media(
project_id: int,
xform_id: str,
odk_central: project_schemas.ODKCentral = None,
):
"""Download submissions from a remote ODK server."""
xform = get_odk_form(odk_central)
data = xform.getSubmissionMedia(project_id, xform_id)
return data


async def test_form_validity(xform_content: str, form_type: str):
"""Validate an XForm.
Parameters:
Expand Down
116 changes: 115 additions & 1 deletion src/backend/app/projects/project_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,19 @@
# along with FMTM. If not, see <https:#www.gnu.org/licenses/>.
#
import base64
import csv
import io
import json
import os
import time
import uuid
import zipfile
from base64 import b64encode
from io import BytesIO
from io import BytesIO, StringIO
from json import dumps, loads
from typing import List
from zipfile import ZipFile
from zipfile import ZipFile as zf

import geoalchemy2
import geojson
Expand Down Expand Up @@ -2659,3 +2661,115 @@ def generate_appuser_files_for_janakpur(
update_background_task_status_in_database(
db, background_task_id, 4
) # 4 is COMPLETED


def expand_geopoints(csv, geopoint_column_name):
"""Accepts a list representing a set of CSV ODK submissions and expands
a geopoint column to include lon, lat, ele, acc columns for easy
import into QGIS or direct conversion to GeoJSON or similar.
"""
newcsv = []
try:
header_row = csv[0]
column_num = header_row.index(geopoint_column_name)
print(f"I found {geopoint_column_name} at index {column_num}")
newheaderrow = header_row[: column_num + 1]
newheaderrow.extend(["lat", "lon", "ele", "acc"])
newheaderrow.extend(header_row[column_num + 1 :])
newcsv.append(newheaderrow)
for row in csv[1:]:
split_geopoint = row[column_num].split()
print(split_geopoint)
if len(split_geopoint) == 4:
newrow = row[: column_num + 1]
newrow.extend(split_geopoint)
newrow.extend(row[column_num + 1 :])
newcsv.append(newrow)

except Exception as e:
print("Is that the right geopoint column name?")
print(e)

return newcsv


def project_submissions_unzipped(
pid, formsl, outdir, collate, expand_geopoint, odk_central_credentials
):
"""Downloads and unzips all of the submissions from a given ODK project."""
if collate:
collated_outfilepath = os.path.join(
outdir, f"project_{pid}_submissions" "_collated.csv"
)
c_outfile = open(collated_outfilepath, "w")
cw = csv.writer(c_outfile)

# create a single file to dump all repeat data lines
# TODO multiple collated files for multiple repeats
c_repeatfilepath = os.path.join(
outdir, f"project_{pid}_repeats" "_collated.csv"
)
c_repeatfile = open(c_repeatfilepath, "w")
cr = csv.writer(c_repeatfile)

for fidx, form in enumerate(formsl):
form_id = form["xmlFormId"]
print(f"Checking submissions from {form_id}.")
# subs_zip = csv_submissions(url, aut, pid, form_id)

subs_zip = central_crud.download_submissions_media(
pid, form_id, odk_central_credentials
)

subs_bytes = BytesIO(subs_zip.content)
subs_bytes.seek(0)
subs_unzipped = zf(subs_bytes)
sub_namelist = subs_unzipped.namelist()

subcount = len(sub_namelist)
print(f"There are {subcount} files in submissions from {form_id}:")
print(sub_namelist)

# Now save the rest of the files
for idx, sub_name in enumerate(sub_namelist):
subs_bytes = subs_unzipped.read(sub_name)
outfilename = os.path.join(outdir, sub_name)

# Some attachments need a subdirectory
suboutdir = os.path.split(outfilename)[0]
if not os.path.exists(suboutdir):
os.makedirs(suboutdir)

# If it is a csv, open it and see if it is more than one line
# This might go wrong if something is encoded in other than UTF-8

if os.path.splitext(sub_name)[1] == ".csv":
subs_stringio = StringIO(subs_bytes.decode())
subs_list = list(csv.reader(subs_stringio))
# Check if there are CSV lines after the headers
subs_len = len(subs_list)
print(f"{sub_name} has {subs_len - 1} submissions")
if subs_len > 1:
subs_to_write = subs_list
if expand_geopoint:
subs_to_write = expand_geopoints(subs_list, expand_geopoint)
with open(outfilename, "w") as outfile:
w = csv.writer(outfile)
w.writerows(subs_to_write)
if collate:
if not idx:
if not fidx:
# First form. Include header
cw.writerows(subs_to_write)
else:
# Not first form. Skip first row (header)
cw.writerows(subs_to_write[1:])
else:
# Include header because it's a repeat
# TODO actually create a separate collated
# CSV output for each repeat in the survey
cr.writerows(subs_to_write)

else:
with open(outfilename, "wb") as outfile:
outfile.write(subs_bytes)
43 changes: 43 additions & 0 deletions src/backend/app/projects/project_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#
import json
import os
import shutil
import uuid
from typing import List, Optional

Expand Down Expand Up @@ -1153,3 +1154,45 @@ async def generate_files_janakpur(
)

return {"Message": f"{project_id}", "task_id": f"{background_task_id}"}


@router.get("/{project_id}/download_csv/")
def download_forms(
project_id: int, db: Session = Depends(database.get_db)
) -> FileResponse:
"""Download the submissions for a given project in CSV format.

Parameters:
- project_id (int): The ID of the project.

Returns:
- FileResponse: The response object containing the downloaded CSV file.
"""
project = project_crud.get_project(db, project_id)
odkid = project.odkid

odk_credentials = project_schemas.ODKCentral(
odk_central_url=project.odk_central_url,
odk_central_user=project.odk_central_user,
odk_central_password=project.odk_central_password,
)

forms = central_crud.list_odk_xforms(odkid, odk_credentials)

output_dir = f"/tmp/{project_id}_submissions/"

if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.makedirs(output_dir)

project_crud.project_submissions_unzipped(
odkid, forms, output_dir, True, False, odk_credentials
)

temp_zip_file = "/tmp/submissions"
shutil.make_archive(temp_zip_file, "zip", output_dir)

headers = {
"Content-Disposition": f'attachment; filename="{os.path.basename(temp_zip_file)}.zip"'
}
return FileResponse(f"{temp_zip_file}.zip", headers=headers)
Loading