From 08d5f39d5548167cb62147e38fb2ca24af8f1c21 Mon Sep 17 00:00:00 2001 From: Niraj Adhikari Date: Thu, 12 Oct 2023 15:07:50 +0545 Subject: [PATCH 1/7] fix: feature count in task features count api --- src/backend/app/tasks/tasks_routes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/backend/app/tasks/tasks_routes.py b/src/backend/app/tasks/tasks_routes.py index 92386da921..1758a153c1 100644 --- a/src/backend/app/tasks/tasks_routes.py +++ b/src/backend/app/tasks/tasks_routes.py @@ -172,9 +172,9 @@ async def task_features_count( # Assemble the final data list data = [] for x in odk_details: - feature_count_query = f""" + feature_count_query = text(f""" select count(*) from features where project_id = {project_id} and task_id = {x['xmlFormId']} - """ + """) result = db.execute(feature_count_query) feature_count = result.fetchone() @@ -184,7 +184,7 @@ async def task_features_count( "task_id": x["xmlFormId"], "submission_count": x["submissions"], "last_submission": x["lastSubmission"], - "feature_count": feature_count["count"], + "feature_count": feature_count[0], } ) From ed855662db8794666d71548eae90501fe169d34b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Oct 2023 09:26:35 +0000 Subject: [PATCH 2/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/backend/app/tasks/tasks_routes.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/backend/app/tasks/tasks_routes.py b/src/backend/app/tasks/tasks_routes.py index 1758a153c1..e45883b68a 100644 --- a/src/backend/app/tasks/tasks_routes.py +++ b/src/backend/app/tasks/tasks_routes.py @@ -172,9 +172,11 @@ async def task_features_count( # Assemble the final data list data = [] for x in odk_details: - feature_count_query = text(f""" + feature_count_query = text( + f""" select count(*) from features where project_id = {project_id} and task_id = {x['xmlFormId']} - """) + """ + ) result = db.execute(feature_count_query) feature_count = result.fetchone() From 9efea73edc5a43d7b90f482ecb12e456b6527a9f Mon Sep 17 00:00:00 2001 From: Niraj Adhikari Date: Thu, 12 Oct 2023 15:58:54 +0545 Subject: [PATCH 3/7] task_count passed in the upload multipolygon api --- src/backend/app/projects/project_routes.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/backend/app/projects/project_routes.py b/src/backend/app/projects/project_routes.py index ce7d2d8b82..093927af1d 100644 --- a/src/backend/app/projects/project_routes.py +++ b/src/backend/app/projects/project_routes.py @@ -378,7 +378,13 @@ async def upload_multi_project_boundary( status_code=428, detail=f"Project with id {project_id} does not exist" ) - return {"message": "Project Boundary Uploaded", "project_id": f"{project_id}"} + # Get the number of tasks in a project + task_count = await tasks_crud.get_task_count_in_project(db, project_id) + + return {"message": "Project Boundary Uploaded", + "project_id": f"{project_id}", + "task_count": task_count + } @router.post("/task_split") From 27e012e11edfe8d906df8a0a9362996fe9b694fe Mon Sep 17 00:00:00 2001 From: Niraj Adhikari Date: Fri, 13 Oct 2023 11:42:14 +0545 Subject: [PATCH 4/7] function to download submissions media from odk central --- src/backend/app/central/central_crud.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/backend/app/central/central_crud.py b/src/backend/app/central/central_crud.py index 46ff85d83d..702d1254dc 100644 --- a/src/backend/app/central/central_crud.py +++ b/src/backend/app/central/central_crud.py @@ -367,6 +367,17 @@ def download_submissions( return fixed.splitlines() +def download_submissions_media( + project_id: int, + xform_id: str, + odk_central: project_schemas.ODKCentral = None, +): + """Download submissions from a remote ODK server.""" + xform = get_odk_form(odk_central) + data = xform.getSubmissionMedia(project_id, xform_id) + return data + + async def test_form_validity(xform_content: str, form_type: str): """Validate an XForm. Parameters: From 6a607212ef27b4333a18d2c656fd8b21365358c5 Mon Sep 17 00:00:00 2001 From: Niraj Adhikari Date: Fri, 13 Oct 2023 11:43:05 +0545 Subject: [PATCH 5/7] Scripts to combine downloaded csv submissions of different forms --- src/backend/app/projects/project_crud.py | 112 +++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/src/backend/app/projects/project_crud.py b/src/backend/app/projects/project_crud.py index 693eadea73..5c3b60aa02 100644 --- a/src/backend/app/projects/project_crud.py +++ b/src/backend/app/projects/project_crud.py @@ -15,6 +15,8 @@ # You should have received a copy of the GNU General Public License # along with FMTM. If not, see . # +import os +import csv import base64 import io import json @@ -68,6 +70,9 @@ from ..tasks import tasks_crud from ..users import user_crud from . import project_schemas +from zipfile import ZipFile as zf +from io import BytesIO, StringIO + QR_CODES_DIR = "QR_codes/" TASK_GEOJSON_DIR = "geojson/" @@ -2657,3 +2662,110 @@ def generate_appuser_files_for_janakpur( update_background_task_status_in_database( db, background_task_id, 4 ) # 4 is COMPLETED + +def expand_geopoints(csv, geopoint_column_name): + """Accepts a list representing a set of CSV ODK submissions and expands + a geopoint column to include lon, lat, ele, acc columns for easy + import into QGIS or direct conversion to GeoJSON or similar. + """ + newcsv = [] + try: + header_row = csv[0] + column_num = header_row.index(geopoint_column_name) + print(f"I found {geopoint_column_name} at index {column_num}") + newheaderrow = header_row[: column_num + 1] + newheaderrow.extend(["lat", "lon", "ele", "acc"]) + newheaderrow.extend(header_row[column_num + 1 :]) + newcsv.append(newheaderrow) + for row in csv[1:]: + split_geopoint = row[column_num].split() + print(split_geopoint) + if len(split_geopoint) == 4: + newrow = row[: column_num + 1] + newrow.extend(split_geopoint) + newrow.extend(row[column_num + 1 :]) + newcsv.append(newrow) + + except Exception as e: + print("Is that the right geopoint column name?") + print(e) + + return newcsv + +def project_submissions_unzipped( + pid, formsl, outdir, collate, expand_geopoint, odk_central_credentials +): + """Downloads and unzips all of the submissions from a given ODK project.""" + if collate: + + collated_outfilepath = os.path.join(outdir, f'project_{pid}_submissions' + '_collated.csv') + c_outfile = open(collated_outfilepath, 'w') + cw = csv.writer(c_outfile) + + # create a single file to dump all repeat data lines + # TODO multiple collated files for multiple repeats + c_repeatfilepath = os.path.join(outdir, f'project_{pid}_repeats' + '_collated.csv') + c_repeatfile = open(c_repeatfilepath, 'w') + cr = csv.writer(c_repeatfile) + + for fidx, form in enumerate(formsl): + form_id = form['xmlFormId'] + print(f'Checking submissions from {form_id}.') + # subs_zip = csv_submissions(url, aut, pid, form_id) + + subs_zip = central_crud.download_submissions_media(pid, form_id, odk_central_credentials) + + subs_bytes = BytesIO(subs_zip.content) + subs_bytes.seek(0) + subs_unzipped = zf(subs_bytes) + sub_namelist = subs_unzipped.namelist() + + subcount = len(sub_namelist) + print(f'There are {subcount} files in submissions from {form_id}:') + print(sub_namelist) + + # Now save the rest of the files + for idx, sub_name in enumerate(sub_namelist): + subs_bytes = subs_unzipped.read(sub_name) + outfilename = os.path.join(outdir, sub_name) + + # Some attachments need a subdirectory + suboutdir = os.path.split(outfilename)[0] + if not os.path.exists(suboutdir): + os.makedirs(suboutdir) + + # If it is a csv, open it and see if it is more than one line + # This might go wrong if something is encoded in other than UTF-8 + + if os.path.splitext(sub_name)[1] == '.csv': + subs_stringio = StringIO(subs_bytes.decode()) + subs_list = list(csv.reader(subs_stringio)) + # Check if there are CSV lines after the headers + subs_len = len(subs_list) + print(f'{sub_name} has {subs_len - 1} submissions') + if subs_len > 1: + subs_to_write = subs_list + if expand_geopoint: + subs_to_write = expand_geopoints(subs_list, expand_geopoint) + with open(outfilename, "w") as outfile: + w = csv.writer(outfile) + w.writerows(subs_to_write) + if collate: + if not idx: + if not fidx: + # First form. Include header + cw.writerows(subs_to_write) + else: + # Not first form. Skip first row (header) + cw.writerows(subs_to_write[1:]) + else: + # Include header because it's a repeat + # TODO actually create a separate collated + # CSV output for each repeat in the survey + cr.writerows(subs_to_write) + + else: + with open(outfilename, "wb") as outfile: + outfile.write(subs_bytes) From 2ebe631406d0018d3f3a737932e7d983cdeecb9d Mon Sep 17 00:00:00 2001 From: Niraj Adhikari Date: Fri, 13 Oct 2023 11:44:07 +0545 Subject: [PATCH 6/7] api to download submissions in a single csv combined for the whole project --- src/backend/app/projects/project_routes.py | 46 +++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/backend/app/projects/project_routes.py b/src/backend/app/projects/project_routes.py index 093927af1d..899598466b 100644 --- a/src/backend/app/projects/project_routes.py +++ b/src/backend/app/projects/project_routes.py @@ -18,6 +18,7 @@ import json import os import uuid +import shutil from typing import List, Optional from fastapi import ( @@ -37,7 +38,7 @@ from osm_fieldwork.xlsforms import xlsforms_path from sqlalchemy.orm import Session -from ..central import central_crud +from ..central import central_crud, central_schemas from ..db import database, db_models from ..models.enums import TILES_SOURCE from ..tasks import tasks_crud @@ -1152,3 +1153,46 @@ async def generate_files_janakpur( ) return {"Message": f"{project_id}", "task_id": f"{background_task_id}"} + + +@router.get("/{project_id}/download_csv/") +def download_forms( + project_id: int, + db: Session = Depends(database.get_db) +) -> FileResponse: + """ + Download the submissions for a given project in CSV format. + + Parameters: + - project_id (int): The ID of the project. + + Returns: + - FileResponse: The response object containing the downloaded CSV file. + """ + + project = project_crud.get_project(db, project_id) + odkid = project.odkid + + odk_credentials = project_schemas.ODKCentral( + odk_central_url=project.odk_central_url, + odk_central_user=project.odk_central_user, + odk_central_password=project.odk_central_password, + ) + + forms = central_crud.list_odk_xforms(odkid, odk_credentials) + + output_dir = f"/tmp/{project_id}_submissions/" + + if os.path.exists(output_dir): + shutil.rmtree(output_dir) + os.makedirs(output_dir) + + project_crud.project_submissions_unzipped(odkid, forms, output_dir, True, False, odk_credentials) + + temp_zip_file = "/tmp/submissions" + shutil.make_archive(temp_zip_file, 'zip', output_dir) + + headers = { + "Content-Disposition": f'attachment; filename="{os.path.basename(temp_zip_file)}.zip"' + } + return FileResponse(f"{temp_zip_file}.zip", headers=headers) From 498dae63782f831f29da0d5d3aa33cff5fdcbfe3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Oct 2023 06:13:36 +0000 Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- CONTRIBUTING.md | 1 + src/backend/app/projects/project_crud.py | 44 +++++++++++----------- src/backend/app/projects/project_routes.py | 26 ++++++------- 3 files changed, 37 insertions(+), 34 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2eb27e340d..322f93b478 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -17,6 +17,7 @@ Right now, we are in the process of building the prototype. We warmly welcome yo Create pull requests (PRs) for changes that you think are needed. We would really appreciate your help! Skills with the following would be beneficial: + - Python - FastAPI - Javascript diff --git a/src/backend/app/projects/project_crud.py b/src/backend/app/projects/project_crud.py index fa139f2d76..20ccb2d0ca 100644 --- a/src/backend/app/projects/project_crud.py +++ b/src/backend/app/projects/project_crud.py @@ -15,9 +15,8 @@ # You should have received a copy of the GNU General Public License # along with FMTM. If not, see . # -import os -import csv import base64 +import csv import io import json import os @@ -25,10 +24,11 @@ import uuid import zipfile from base64 import b64encode -from io import BytesIO +from io import BytesIO, StringIO from json import dumps, loads from typing import List from zipfile import ZipFile +from zipfile import ZipFile as zf import geoalchemy2 import geojson @@ -70,9 +70,6 @@ from ..tasks import tasks_crud from ..users import user_crud from . import project_schemas -from zipfile import ZipFile as zf -from io import BytesIO, StringIO - QR_CODES_DIR = "QR_codes/" TASK_GEOJSON_DIR = "geojson/" @@ -2665,6 +2662,7 @@ def generate_appuser_files_for_janakpur( db, background_task_id, 4 ) # 4 is COMPLETED + def expand_geopoints(csv, geopoint_column_name): """Accepts a list representing a set of CSV ODK submissions and expands a geopoint column to include lon, lat, ele, acc columns for easy @@ -2694,30 +2692,34 @@ def expand_geopoints(csv, geopoint_column_name): return newcsv + def project_submissions_unzipped( pid, formsl, outdir, collate, expand_geopoint, odk_central_credentials ): """Downloads and unzips all of the submissions from a given ODK project.""" if collate: - - collated_outfilepath = os.path.join(outdir, f'project_{pid}_submissions' - '_collated.csv') - c_outfile = open(collated_outfilepath, 'w') + collated_outfilepath = os.path.join( + outdir, f"project_{pid}_submissions" "_collated.csv" + ) + c_outfile = open(collated_outfilepath, "w") cw = csv.writer(c_outfile) # create a single file to dump all repeat data lines # TODO multiple collated files for multiple repeats - c_repeatfilepath = os.path.join(outdir, f'project_{pid}_repeats' - '_collated.csv') - c_repeatfile = open(c_repeatfilepath, 'w') + c_repeatfilepath = os.path.join( + outdir, f"project_{pid}_repeats" "_collated.csv" + ) + c_repeatfile = open(c_repeatfilepath, "w") cr = csv.writer(c_repeatfile) for fidx, form in enumerate(formsl): - form_id = form['xmlFormId'] - print(f'Checking submissions from {form_id}.') + form_id = form["xmlFormId"] + print(f"Checking submissions from {form_id}.") # subs_zip = csv_submissions(url, aut, pid, form_id) - subs_zip = central_crud.download_submissions_media(pid, form_id, odk_central_credentials) + subs_zip = central_crud.download_submissions_media( + pid, form_id, odk_central_credentials + ) subs_bytes = BytesIO(subs_zip.content) subs_bytes.seek(0) @@ -2725,7 +2727,7 @@ def project_submissions_unzipped( sub_namelist = subs_unzipped.namelist() subcount = len(sub_namelist) - print(f'There are {subcount} files in submissions from {form_id}:') + print(f"There are {subcount} files in submissions from {form_id}:") print(sub_namelist) # Now save the rest of the files @@ -2741,12 +2743,12 @@ def project_submissions_unzipped( # If it is a csv, open it and see if it is more than one line # This might go wrong if something is encoded in other than UTF-8 - if os.path.splitext(sub_name)[1] == '.csv': + if os.path.splitext(sub_name)[1] == ".csv": subs_stringio = StringIO(subs_bytes.decode()) subs_list = list(csv.reader(subs_stringio)) # Check if there are CSV lines after the headers subs_len = len(subs_list) - print(f'{sub_name} has {subs_len - 1} submissions') + print(f"{sub_name} has {subs_len - 1} submissions") if subs_len > 1: subs_to_write = subs_list if expand_geopoint: @@ -2755,7 +2757,7 @@ def project_submissions_unzipped( w = csv.writer(outfile) w.writerows(subs_to_write) if collate: - if not idx: + if not idx: if not fidx: # First form. Include header cw.writerows(subs_to_write) @@ -2767,7 +2769,7 @@ def project_submissions_unzipped( # TODO actually create a separate collated # CSV output for each repeat in the survey cr.writerows(subs_to_write) - + else: with open(outfilename, "wb") as outfile: outfile.write(subs_bytes) diff --git a/src/backend/app/projects/project_routes.py b/src/backend/app/projects/project_routes.py index 899598466b..46de72c3c9 100644 --- a/src/backend/app/projects/project_routes.py +++ b/src/backend/app/projects/project_routes.py @@ -17,8 +17,8 @@ # import json import os -import uuid import shutil +import uuid from typing import List, Optional from fastapi import ( @@ -38,7 +38,7 @@ from osm_fieldwork.xlsforms import xlsforms_path from sqlalchemy.orm import Session -from ..central import central_crud, central_schemas +from ..central import central_crud from ..db import database, db_models from ..models.enums import TILES_SOURCE from ..tasks import tasks_crud @@ -382,10 +382,11 @@ async def upload_multi_project_boundary( # Get the number of tasks in a project task_count = await tasks_crud.get_task_count_in_project(db, project_id) - return {"message": "Project Boundary Uploaded", - "project_id": f"{project_id}", - "task_count": task_count - } + return { + "message": "Project Boundary Uploaded", + "project_id": f"{project_id}", + "task_count": task_count, + } @router.post("/task_split") @@ -1157,11 +1158,9 @@ async def generate_files_janakpur( @router.get("/{project_id}/download_csv/") def download_forms( - project_id: int, - db: Session = Depends(database.get_db) + project_id: int, db: Session = Depends(database.get_db) ) -> FileResponse: - """ - Download the submissions for a given project in CSV format. + """Download the submissions for a given project in CSV format. Parameters: - project_id (int): The ID of the project. @@ -1169,7 +1168,6 @@ def download_forms( Returns: - FileResponse: The response object containing the downloaded CSV file. """ - project = project_crud.get_project(db, project_id) odkid = project.odkid @@ -1187,10 +1185,12 @@ def download_forms( shutil.rmtree(output_dir) os.makedirs(output_dir) - project_crud.project_submissions_unzipped(odkid, forms, output_dir, True, False, odk_credentials) + project_crud.project_submissions_unzipped( + odkid, forms, output_dir, True, False, odk_credentials + ) temp_zip_file = "/tmp/submissions" - shutil.make_archive(temp_zip_file, 'zip', output_dir) + shutil.make_archive(temp_zip_file, "zip", output_dir) headers = { "Content-Disposition": f'attachment; filename="{os.path.basename(temp_zip_file)}.zip"'