hotosm · nrjadkry · Oct 12, 2023 · Oct 12, 2023 · Oct 12, 2023 · Oct 12, 2023
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -17,6 +17,7 @@ Right now, we are in the process of building the prototype. We warmly welcome yo
 Create pull requests (PRs) for changes that you think are needed. We would really appreciate your help!
 
 Skills with the following would be beneficial:
+
 - Python
 - FastAPI
 - Javascript

diff --git a/src/backend/app/central/central_crud.py b/src/backend/app/central/central_crud.py
@@ -367,6 +367,17 @@ def download_submissions(
     return fixed.splitlines()
 
 
+def download_submissions_media(
+    project_id: int,
+    xform_id: str,
+    odk_central: project_schemas.ODKCentral = None,
+):
+    """Download submissions from a remote ODK server."""
+    xform = get_odk_form(odk_central)
+    data = xform.getSubmissionMedia(project_id, xform_id)
+    return data
+
+
 async def test_form_validity(xform_content: str, form_type: str):
     """Validate an XForm.
     Parameters:

diff --git a/src/backend/app/projects/project_crud.py b/src/backend/app/projects/project_crud.py
@@ -16,17 +16,19 @@
 #     along with FMTM.  If not, see <https:#www.gnu.org/licenses/>.
 #
 import base64
+import csv
 import io
 import json
 import os
 import time
 import uuid
 import zipfile
 from base64 import b64encode
-from io import BytesIO
+from io import BytesIO, StringIO
 from json import dumps, loads
 from typing import List
 from zipfile import ZipFile
+from zipfile import ZipFile as zf
 
 import geoalchemy2
 import geojson
@@ -2659,3 +2661,115 @@ def generate_appuser_files_for_janakpur(
         update_background_task_status_in_database(
             db, background_task_id, 4
         )  # 4 is COMPLETED
+
+
+def expand_geopoints(csv, geopoint_column_name):
+    """Accepts a list representing a set of CSV ODK submissions and expands
+    a geopoint column to include lon, lat, ele, acc columns for easy
+    import into QGIS or direct conversion to GeoJSON or similar.
+    """
+    newcsv = []
+    try:
+        header_row = csv[0]
+        column_num = header_row.index(geopoint_column_name)
+        print(f"I found {geopoint_column_name} at index {column_num}")
+        newheaderrow = header_row[: column_num + 1]
+        newheaderrow.extend(["lat", "lon", "ele", "acc"])
+        newheaderrow.extend(header_row[column_num + 1 :])
+        newcsv.append(newheaderrow)
+        for row in csv[1:]:
+            split_geopoint = row[column_num].split()
+            print(split_geopoint)
+            if len(split_geopoint) == 4:
+                newrow = row[: column_num + 1]
+                newrow.extend(split_geopoint)
+                newrow.extend(row[column_num + 1 :])
+            newcsv.append(newrow)
+
+    except Exception as e:
+        print("Is that the right geopoint column name?")
+        print(e)
+
+    return newcsv
+
+
+def project_submissions_unzipped(
+    pid, formsl, outdir, collate, expand_geopoint, odk_central_credentials
+):
+    """Downloads and unzips all of the submissions from a given ODK project."""
+    if collate:
+        collated_outfilepath = os.path.join(
+            outdir, f"project_{pid}_submissions" "_collated.csv"
+        )
+        c_outfile = open(collated_outfilepath, "w")
+        cw = csv.writer(c_outfile)
+
+        # create a single file to dump all repeat data lines
+        # TODO multiple collated files for multiple repeats
+        c_repeatfilepath = os.path.join(
+            outdir, f"project_{pid}_repeats" "_collated.csv"
+        )
+        c_repeatfile = open(c_repeatfilepath, "w")
+        cr = csv.writer(c_repeatfile)
+
+    for fidx, form in enumerate(formsl):
+        form_id = form["xmlFormId"]
+        print(f"Checking submissions from {form_id}.")
+        # subs_zip = csv_submissions(url, aut, pid, form_id)
+
+        subs_zip = central_crud.download_submissions_media(
+            pid, form_id, odk_central_credentials
+        )
+
+        subs_bytes = BytesIO(subs_zip.content)
+        subs_bytes.seek(0)
+        subs_unzipped = zf(subs_bytes)
+        sub_namelist = subs_unzipped.namelist()
+
+        subcount = len(sub_namelist)
+        print(f"There are {subcount} files in submissions from {form_id}:")
+        print(sub_namelist)
+
+        # Now save the rest of the files
+        for idx, sub_name in enumerate(sub_namelist):
+            subs_bytes = subs_unzipped.read(sub_name)
+            outfilename = os.path.join(outdir, sub_name)
+
+            # Some attachments need a subdirectory
+            suboutdir = os.path.split(outfilename)[0]
+            if not os.path.exists(suboutdir):
+                os.makedirs(suboutdir)
+
+            # If it is a csv, open it and see if it is more than one line
+            # This might go wrong if something is encoded in other than UTF-8
+
+            if os.path.splitext(sub_name)[1] == ".csv":
+                subs_stringio = StringIO(subs_bytes.decode())
+                subs_list = list(csv.reader(subs_stringio))
+                # Check if there are CSV lines after the headers
+                subs_len = len(subs_list)
+                print(f"{sub_name} has {subs_len - 1} submissions")
+                if subs_len > 1:
+                    subs_to_write = subs_list
+                    if expand_geopoint:
+                        subs_to_write = expand_geopoints(subs_list, expand_geopoint)
+                    with open(outfilename, "w") as outfile:
+                        w = csv.writer(outfile)
+                        w.writerows(subs_to_write)
+                    if collate:
+                        if not idx:
+                            if not fidx:
+                                # First form. Include header
+                                cw.writerows(subs_to_write)
+                            else:
+                                # Not first form. Skip first row (header)
+                                cw.writerows(subs_to_write[1:])
+                        else:
+                            # Include header because it's a repeat
+                            # TODO actually create a separate collated
+                            # CSV output for each repeat in the survey
+                            cr.writerows(subs_to_write)
+
+            else:
+                with open(outfilename, "wb") as outfile:
+                    outfile.write(subs_bytes)
diff --git a/src/backend/app/projects/project_routes.py b/src/backend/app/projects/project_routes.py
@@ -17,6 +17,7 @@
 #
 import json
 import os
+import shutil
 import uuid
 from typing import List, Optional
 
@@ -1153,3 +1154,45 @@ async def generate_files_janakpur(
     )
 
     return {"Message": f"{project_id}", "task_id": f"{background_task_id}"}
+
+
+@router.get("/{project_id}/download_csv/")
+def download_forms(
+    project_id: int, db: Session = Depends(database.get_db)
+) -> FileResponse:
+    """Download the submissions for a given project in CSV format.
+
+    Parameters:
+        - project_id (int): The ID of the project.
+
+    Returns:
+        - FileResponse: The response object containing the downloaded CSV file.
+    """
+    project = project_crud.get_project(db, project_id)
+    odkid = project.odkid
+
+    odk_credentials = project_schemas.ODKCentral(
+        odk_central_url=project.odk_central_url,
+        odk_central_user=project.odk_central_user,
+        odk_central_password=project.odk_central_password,
+    )
+
+    forms = central_crud.list_odk_xforms(odkid, odk_credentials)
+
+    output_dir = f"/tmp/{project_id}_submissions/"
+
+    if os.path.exists(output_dir):
+        shutil.rmtree(output_dir)
+    os.makedirs(output_dir)
+
+    project_crud.project_submissions_unzipped(
+        odkid, forms, output_dir, True, False, odk_credentials
+    )
+
+    temp_zip_file = "/tmp/submissions"
+    shutil.make_archive(temp_zip_file, "zip", output_dir)
+
+    headers = {
+        "Content-Disposition": f'attachment; filename="{os.path.basename(temp_zip_file)}.zip"'
+    }
+    return FileResponse(f"{temp_zip_file}.zip", headers=headers)
-Original file line number
+Diff line change
@@ Expand Up @@
     Create pull requests (PRs) for changes that you think are needed. We would really appreciate your help!
     Skills with the following would be beneficial:
     - Python
     - FastAPI
     - Javascript
@@ Expand Down @@