Skip to content

Commit

Permalink
feat(backend): endpoint to conflate the submission with osm data (#1594)
Browse files Browse the repository at this point in the history
* feat: endpoint to conflate the submission with osm data

* refactor: use postgresclient to get osm features using task geom

* fix: update wrap_check_access to bypass check for public project

* refactor: remove filename from extra params when requesting data extracts

* refactor: return DbUser and DbProject dict even if ProjectVisibility.PUBLIC

---------

Co-authored-by: spwoodcock <[email protected]>
  • Loading branch information
Sujanadh and spwoodcock authored Jul 11, 2024
1 parent b97b3bd commit 477f810
Show file tree
Hide file tree
Showing 7 changed files with 266 additions and 40 deletions.
24 changes: 18 additions & 6 deletions src/backend/app/auth/roles.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ async def wrap_check_access(
if not db_user:
raise HTTPException(
status_code=HTTPStatus.FORBIDDEN,
detail="User is not a project manager",
detail="User do not have permission to access the project.",
)

return {
Expand Down Expand Up @@ -321,17 +321,29 @@ async def mapper(
project: DbProject = Depends(get_project_by_id),
db: Session = Depends(get_db),
user_data: AuthUser = Depends(login_required),
) -> AuthUser:
) -> ProjectUserDict:
"""A mapper for a specific project."""
# If project is public, skip permission check
if project.visibility == ProjectVisibility.PUBLIC:
return user_data
user_id = user_data.id
sql = text("SELECT * FROM users WHERE id = :user_id;")
result = db.execute(sql, {"user_id": user_id})
db_user = result.first()

if not db_user:
raise HTTPException(
status_code=HTTPStatus.NOT_FOUND,
detail=f"User ({user_id}) does not exist in database",
)

return {
"user": DbUser(**db_user._asdict()),
"project": project,
}

await wrap_check_access(
return await wrap_check_access(
project,
db,
user_data,
ProjectRole.MAPPER,
)

return user_data
15 changes: 8 additions & 7 deletions src/backend/app/central/central_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import os
import uuid
from io import BytesIO, StringIO
from typing import Optional
from typing import Optional, Union
from xml.etree.ElementTree import Element, SubElement

import geojson
Expand Down Expand Up @@ -624,8 +624,8 @@ def flatten_json(data: dict, target: dict):


async def convert_odk_submission_json_to_geojson(
input_json: BytesIO,
) -> BytesIO:
input_json: Union[BytesIO, list],
) -> geojson.FeatureCollection:
"""Convert ODK submission JSON file to GeoJSON.
Used for loading into QGIS.
Expand All @@ -636,7 +636,10 @@ async def convert_odk_submission_json_to_geojson(
Returns:
geojson (BytesIO): GeoJSON format ODK submission.
"""
submission_json = json.loads(input_json.getvalue())
if isinstance(input_json, list):
submission_json = input_json
else:
submission_json = json.loads(input_json.getvalue())

if not submission_json:
raise HTTPException(
Expand All @@ -660,9 +663,7 @@ async def convert_odk_submission_json_to_geojson(
feature = geojson.Feature(geometry=geojson_geom, properties=data)
all_features.append(feature)

featcol = geojson.FeatureCollection(features=all_features)

return BytesIO(json.dumps(featcol).encode("utf-8"))
return geojson.FeatureCollection(features=all_features)


async def get_entities_geojson(
Expand Down
122 changes: 122 additions & 0 deletions src/backend/app/db/postgis_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import logging
from asyncio import gather
from datetime import datetime, timezone
from io import BytesIO
from random import getrandbits
from typing import Optional, Union

Expand All @@ -32,15 +33,20 @@
from geoalchemy2.shape import from_shape, to_shape
from geojson_pydantic import Feature, MultiPolygon, Polygon
from geojson_pydantic import FeatureCollection as FeatCol
from osm_fieldwork.data_models import data_models_path
from osm_rawdata.postgres import PostgresClient
from shapely.geometry import mapping, shape
from shapely.geometry.base import BaseGeometry
from shapely.ops import unary_union
from sqlalchemy import text
from sqlalchemy.exc import ProgrammingError
from sqlalchemy.orm import Session

from app.config import settings
from app.models.enums import XLSFormType

log = logging.getLogger(__name__)
API_URL = settings.RAW_DATA_API_URL


def timestamp():
Expand Down Expand Up @@ -785,3 +791,119 @@ def parse_featcol(features: Union[Feature, FeatCol, MultiPolygon, Polygon]):
elif isinstance(features, Feature):
feat_col = geojson.FeatureCollection([feat_col])
return feat_col


def get_osm_geometries(form_category, geometry):
"""Request a snapshot based on the provided geometry.
Args:
form_category(str): feature category type (eg: buildings).
geometry (str): The geometry data in JSON format.
Returns:
dict: The JSON response containing the snapshot data.
"""
config_filename = XLSFormType(form_category).name
data_model = f"{data_models_path}/{config_filename}.yaml"

with open(data_model, "rb") as data_model_yaml:
extract_config = BytesIO(data_model_yaml.read())

pg = PostgresClient(
"underpass",
extract_config,
auth_token=settings.RAW_DATA_API_AUTH_TOKEN
if settings.RAW_DATA_API_AUTH_TOKEN
else None,
)
return pg.execQuery(
geometry,
extra_params={
"outputType": "geojson",
"bind_zip": True,
"useStWithin": False,
},
)


def geometries_almost_equal(
geom1: BaseGeometry, geom2: BaseGeometry, tolerance: float = 1e-6
) -> bool:
"""Determine if two geometries are almost equal within a tolerance.
Args:
geom1 (BaseGeometry): First geometry.
geom2 (BaseGeometry): Second geometry.
tolerance (float): Tolerance level for almost equality.
Returns:
bool: True if geometries are almost equal else False.
"""
return geom1.equals_exact(geom2, tolerance)


def check_partial_overlap(geom1: BaseGeometry, geom2: BaseGeometry) -> bool:
"""Determine if two geometries have a partial overlap.
Args:
geom1 (BaseGeometry): First geometry.
geom2 (BaseGeometry): Second geometry.
Returns:
bool: True if geometries have a partial overlap, else False.
"""
intersection = geom1.intersection(geom2)
return not intersection.is_empty and (
0 < intersection.area < geom1.area and 0 < intersection.area < geom2.area
)


def conflate_features(
input_features: list, osm_features: list, remove_conflated=False, tolerance=1e-6
):
"""Conflate input features with OSM features to identify overlaps.
Args:
input_features (list): A list of input features with geometries.
osm_features (list): A list of OSM features with geometries.
remove_conflated (bool): Flag to remove conflated features.
tolerance (float): Tolerance level for almost equality.
Returns:
list: A list of features after conflation with OSM features.
"""
osm_geometries = [shape(feature["geometry"]) for feature in osm_features]
return_features = []

for input_feature in input_features:
input_geometry = shape(input_feature["geometry"])
is_duplicate = False
is_partial_overlap = False

for osm_feature, osm_geometry in zip(
osm_features, osm_geometries, strict=False
):
if geometries_almost_equal(input_geometry, osm_geometry, tolerance):
is_duplicate = True
input_feature["properties"].update(osm_feature["properties"])
break

if check_partial_overlap(input_geometry, osm_geometry):
is_partial_overlap = True
new_feature = {
"type": "Feature",
"geometry": mapping(osm_feature["geometry"]),
"properties": osm_feature["properties"],
}
return_features.append(new_feature)
break

input_feature["properties"]["is_duplicate"] = is_duplicate
input_feature["properties"]["is_partial_overlap"] = is_partial_overlap

if (is_duplicate or is_partial_overlap) and remove_conflated is True:
continue

return_features.append(input_feature)

return return_features
3 changes: 2 additions & 1 deletion src/backend/app/helpers/helper_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,10 @@ async def convert_odk_submission_json_to_geojson_wrapper(

contents = await json_file.read()
submission_geojson = await convert_odk_submission_json_to_geojson(BytesIO(contents))
submission_data = BytesIO(json.dumps(submission_geojson).encode("utf-8"))

headers = {"Content-Disposition": f"attachment; filename={filename.stem}.geojson"}
return Response(submission_geojson.getvalue(), headers=headers)
return Response(submission_data.getvalue(), headers=headers)


@router.get("/view-raw-data-api-token")
Expand Down
48 changes: 47 additions & 1 deletion src/backend/app/submissions/submission_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@
# from osm_fieldwork.json2osm import json2osm
from sqlalchemy.orm import Session

from app.central.central_crud import get_odk_form, get_odk_project, list_odk_xforms
from app.central.central_crud import (
get_odk_form,
get_odk_project,
list_odk_xforms,
)
from app.config import settings
from app.db import db_models
from app.models.enums import HTTPStatus
Expand Down Expand Up @@ -524,3 +528,45 @@ async def get_submission_detail(
odk_form.getSubmissions(project.odkid, db_xform.odk_form_id, submission_id)
)
return submission.get("value", [])[0]


# FIXME might not needed
# async def get_submission_geojson(
# project_id: int,
# db: Session,
# ):
# """Retrieve GeoJSON data for a submission associated with a project.

# Args:
# project_id (int): The ID of the project.
# db (Session): The database session.

# Returns:
# FeatCol: A GeoJSON FeatCol containing the submission features.
# """
# data = await get_submission_by_project(project_id, {}, db)
# submission_json = data.get("value", [])

# if not submission_json:
# raise HTTPException(
# status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
# detail="Loading JSON submission failed",
# )

# all_features = []
# for submission in submission_json:
# keys_to_remove = ["meta", "__id", "__system"]
# for key in keys_to_remove:
# submission.pop(key)

# data = {}
# flatten_json(submission, data)

# geojson_geom = await postgis_utils.javarosa_to_geojson_geom(
# data.pop("xlocation", {}), geom_type="Polygon"
# )

# feature = geojson.Feature(geometry=geojson_geom, properties=data)
# all_features.append(feature)

# return geojson.FeatureCollection(features=all_features)
71 changes: 48 additions & 23 deletions src/backend/app/submissions/submission_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@
from app.auth.roles import mapper, project_manager
from app.central import central_crud
from app.db import database, db_models, postgis_utils
from app.models.enums import HTTPStatus, ReviewStateEnum
from app.models.enums import ReviewStateEnum
from app.projects import project_crud, project_deps
from app.submissions import submission_crud, submission_schemas
from app.tasks.task_deps import get_task_by_id

router = APIRouter(
prefix="/submission",
Expand Down Expand Up @@ -558,32 +559,56 @@ async def download_submission_geojson(
data = await submission_crud.get_submission_by_project(project_id, {}, db)
submission_json = data.get("value", [])

if not submission_json:
raise HTTPException(
status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
detail="Loading JSON submission failed",
)
submission_geojson = await central_crud.convert_odk_submission_json_to_geojson(
submission_json
)
submission_data = BytesIO(json.dumps(submission_geojson).encode("utf-8"))
filename = project.project_name_prefix

all_features = []
for submission in submission_json:
keys_to_remove = ["meta", "__id", "__system"]
for key in keys_to_remove:
submission.pop(key)
headers = {"Content-Disposition": f"attachment; filename={filename}.geojson"}

data = {}
central_crud.flatten_json(submission, data)
return Response(submission_data.getvalue(), headers=headers)

geojson_geom = await postgis_utils.javarosa_to_geojson_geom(
data.pop("xlocation", {}), geom_type="Polygon"
)

feature = geojson.Feature(geometry=geojson_geom, properties=data)
all_features.append(feature)
@router.get("/conflate_submission_geojson/")
async def conflate_geojson(
task_id: int,
current_user: dict = Depends(mapper), # FIXME change this validator
remove_conflated=False,
db: Session = Depends(database.get_db),
):
"""Conflates the input GeoJSON with OpenStreetMap data.
featcol = geojson.FeatureCollection(features=all_features)
submission_geojson = BytesIO(json.dumps(featcol).encode("utf-8"))
filename = project.project_name_prefix
Args:
task_id(int): task index of project.
current_user(dict): Check if user is mapper.
remove_conflated(bool): returns geojson which are not overlapped with osm data.
db (Session): The database session.
headers = {"Content-Disposition": f"attachment; filename={filename}.geojson"}
Returns:
str: Updated GeoJSON string with conflated features.
"""
try:
project = current_user["project"]
db_task = await get_task_by_id(project.id, task_id, db)
task_aoi = postgis_utils.geometry_to_geojson(db_task.outline)
task_geojson = geojson.dumps(task_aoi, indent=2)

data = await submission_crud.get_submission_by_project(project.id, {}, db)
submission_json = data.get("value", [])

return Response(submission_geojson.getvalue(), headers=headers)
submission_geojson = await central_crud.convert_odk_submission_json_to_geojson(
submission_json
)
form_category = project.xform_category
input_features = submission_geojson["features"]
osm_features = postgis_utils.get_osm_geometries(form_category, task_geojson)
submission_geojson["features"] = postgis_utils.conflate_features(
input_features, osm_features.get("features", []), remove_conflated
)

return submission_geojson
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to process conflation: {str(e)}"
) from e
Loading

0 comments on commit 477f810

Please sign in to comment.