Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature : Polygon Stats Including Data Completeness Metrics #181

Merged
merged 4 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions API/api_worker.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import os
import pathlib
import re
Expand All @@ -10,7 +11,7 @@
import sozipfile.sozipfile as zipfile
from celery import Celery

from src.app import RawData, S3FileTransfer
from src.app import PolygonStats, RawData, S3FileTransfer
from src.config import ALLOW_BIND_ZIP_FILTER
from src.config import CELERY_BROKER_URL as celery_broker_uri
from src.config import CELERY_RESULT_BACKEND as celery_backend
Expand Down Expand Up @@ -68,6 +69,15 @@ def process_raw_data(self, params):
file_parts
)
inside_file_size = 0
polygon_stats = None
if "include_stats" in params:
if params.include_stats:
feature = {
"type": "Feature",
"geometry": json.loads(params.geometry.json()),
"properties": {},
}
polygon_stats = PolygonStats(feature).get_summary_stats()
if bind_zip:
logging.debug("Zip Binding Started !")
# saving file in temp directory instead of memory so that zipping file will not eat memory
Expand All @@ -93,7 +103,11 @@ def process_raw_data(self, params):
# Adding metadata readme.txt
readme_content = f"Exported Timestamp (UTC{utc_offset}): {utc_now.strftime('%Y-%m-%d %H:%M:%S')}\n"
readme_content += "Exported through Raw-data-api (https://github.com/hotosm/raw-data-api) using OpenStreetMap data.\n"
readme_content += "Learn more about OpenStreetMap and its data usage policy : https://www.openstreetmap.org/about"
readme_content += "Learn more about OpenStreetMap and its data usage policy : https://www.openstreetmap.org/about \n"
if polygon_stats:
readme_content += f'{polygon_stats["summary"]["building"]}\n'
readme_content += f'{polygon_stats["summary"]["road"]}\n'
readme_content += "Read about what this summary means: indicators: https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/indicators.md,metrics: https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/metrics.md"

zf.writestr("Readme.txt", readme_content)

Expand Down Expand Up @@ -156,14 +170,17 @@ def process_raw_data(self, params):
logging.info(
f"Done Export : {exportname} of {round(inside_file_size/1000000)} MB / {geom_area} sqkm in {response_time_str}"
)
return {
final_response = {
"download_url": download_url,
"file_name": params.file_name,
"process_time": response_time_str,
"query_area": f"{round(geom_area,2)} Sq Km",
"binded_file_size": f"{round(inside_file_size/1000000,2)} MB",
"zip_file_size_bytes": zip_file_size,
}
if polygon_stats:
final_response["stats"] = polygon_stats
return final_response

except Exception as ex:
raise ex
Expand Down
7 changes: 7 additions & 0 deletions API/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from slowapi.errors import RateLimitExceeded

from src.config import (
ENABLE_POLYGON_STATISTICS_ENDPOINTS,
EXPORT_PATH,
LIMITER,
LOG_LEVEL,
Expand All @@ -42,6 +43,9 @@
from .raw_data import router as raw_data_router
from .tasks import router as tasks_router

if ENABLE_POLYGON_STATISTICS_ENDPOINTS:
from .stats import router as stats_router

# only use sentry if it is specified in config blocks
if SENTRY_DSN:
sentry_sdk.init(
Expand All @@ -62,6 +66,9 @@
app.include_router(auth_router)
app.include_router(raw_data_router)
app.include_router(tasks_router)
if ENABLE_POLYGON_STATISTICS_ENDPOINTS:
app.include_router(stats_router)

app.openapi = {
"info": {
"title": "Raw Data API",
Expand Down
27 changes: 27 additions & 0 deletions API/stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from fastapi import APIRouter, Request
from fastapi_versioning import version

from src.app import PolygonStats
from src.config import LIMITER as limiter
from src.config import POLYGON_STATISTICS_API_RATE_LIMIT
from src.validation.models import StatsRequestParams

router = APIRouter(prefix="/stats", tags=["Stats"])


@router.post("/polygon/")
@limiter.limit(f"{POLYGON_STATISTICS_API_RATE_LIMIT}/minute")
@version(1)
async def get_polygon_stats(request: Request, params: StatsRequestParams):
"""Get statistics for the specified polygon.

Args:
request (Request): An HTTP request object.
params (StatsRequestParams): Parameters for the statistics request, including the polygon geometry.

Returns:
dict: A dictionary containing statistics for the specified polygon.
"""
generator = PolygonStats(params.geometry)

return generator.get_summary_stats()
7 changes: 6 additions & 1 deletion docs/src/installation/configurations.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ The following are the different configuration options that are accepted.
| `USE_CONNECTION_POOLING` | `USE_CONNECTION_POOLING` | `[API_CONFIG]` | `false` | Enable psycopg2 connection pooling | OPTIONAL |
| `ALLOW_BIND_ZIP_FILTER` | `ALLOW_BIND_ZIP_FILTER` | `[API_CONFIG]` | `true` | Enable zip compression for exports | OPTIONAL |
| `ENABLE_TILES` | `ENABLE_TILES` | `[API_CONFIG]` | `false` | Enable Tile Output (Pmtiles and Mbtiles) | OPTIONAL |
| `INDEX_THRESHOLD` | `INDEX_THRESHOLD` | `[API_CONFIG]` | `5000` | Area in sqkm to apply grid/country index filter | OPTIONAL |
| `ENABLE_POLYGON_STATISTICS_ENDPOINTS` | `ENABLE_POLYGON_STATISTICS_ENDPOINTS` | `[API_CONFIG]` | `False` | Option to enable endpoints related the polygon statistics about the approx buildings,road length in passed polygon| OPTIONAL |
| `POLYGON_STATISTICS_API_URL` | `POLYGON_STATISTICS_API_URL` | `[API_CONFIG]` | `None` | API URL for the polygon statistics to fetch the metadata , Currently tested with graphql query endpoint of Kontour , Only required if it is enabled from ENABLE_POLYGON_STATISTICS_ENDPOINTS | OPTIONAL |
| `POLYGON_STATISTICS_API_URL` | `POLYGON_STATISTICS_API_RATE_LIMIT` | `[API_CONFIG]` | `5` | Rate limit to be applied for statistics endpoint per minute, Defaults to 5 request is allowed per minute | OPTIONAL |
| `CELERY_BROKER_URL` | `CELERY_BROKER_URL` | `[CELERY]` | `redis://localhost:6379/0` | Redis connection string for the broker | OPTIONAL |
| `CELERY_RESULT_BACKEND` | `CELERY_RESULT_BACKEND` | `[CELERY]` | `redis://localhost:6379/0` | Redis connection string for the the result backend | OPTIONAL |
| `FILE_UPLOAD_METHOD` | `FILE_UPLOAD_METHOD` | `[EXPORT_UPLOAD]` | `disk` | File upload method; Allowed values - disk, s3 | OPTIONAL |
Expand Down Expand Up @@ -89,6 +91,9 @@ The following are the different configuration options that are accepted.
| `ENABLE_TILES` | `[API_CONFIG]` | Yes | Yes |
| `ALLOW_BIND_ZIP_FILTER` | `[API_CONFIG]` | Yes | Yes |
| `INDEX_THRESHOLD` | `[API_CONFIG]` | No | Yes |
| `ENABLE_POLYGON_STATISTICS_ENDPOINTS` | `[API_CONFIG]` | Yes | No |
| `POLYGON_STATISTICS_API_URL` | `[API_CONFIG]` | Yes | No |
| `POLYGON_STATISTICS_API_RATE_LIMIT` | `[API_CONFIG]` | Yes | No |
| `CELERY_BROKER_URL` | TBD | Yes | Yes |
| `CELERY_RESULT_BACKEND` | TBD | Yes | Yes |
| `FILE_UPLOAD_METHOD` | TBD | Yes | Yes |
Expand Down
11 changes: 11 additions & 0 deletions docs/src/stats/indicators.md

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions docs/src/stats/metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
| Metric | definition | source | example | detailed explanation |
| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- |
| osmbuildingsgappercent | This metric is based on H3: Hexagonal Hierarchical Spatial Index. This metric shows the percentage of populated h3 hexagons at 8 resolution (population > 0) for which there are no buildings in the OSM dataset | populated_area_km2,<br>building_count | {name:"percentageXWhereNoY", id:"osmBuildingGapsPercentage", x:"populated_area_km2", y:"building_count"} | detailed information about the indicators used can be taken from the "indicators" sheet |
| antiqueosmbuildingpercent | This metric is based on H3: Hexagonal Hierarchical Spatial Index. This metric shows the percentage of populated h3 hexagons at 8 resolution (population > 0) for which there are no buildings in the OSM dataset that have been modified or created in the last 6 months | populated_area_km2,<br>building_count_6_months | {name:"percentageXWhereNoY", id:"antiqueOsmBuildingsPercentage", x:"populated_area_km2", y:"building_count_6_months"} | detailed information about the indicators used can be taken from the "indicators" sheet |
| osmroadsgappercent | This metric is based on H3: Hexagonal Hierarchical Spatial Index. This metric shows the percentage of populated h3 hexagons at 8 resolution (population > 0) for which there are no highways in the OSM dataset | populated_area_km2,<br>highway_length | {name:"percentageXWhereNoY", id:"osmRoadGapsPercentage", x:"populated_area_km2", y:"highway_length"} | detailed information about the indicators used can be taken from the "indicators" sheet |
| antiqueosmroadspercent | This metric is based on H3: Hexagonal Hierarchical Spatial Index. This metric shows the percentage of populated h3 hexagons at 8 resolution (population > 0) for which there are no highways in the OSM dataset that have been modified or created in the last 6 months | populated_area_km2,<br>highway_length_6_months | {name:"percentageXWhereNoY", id:"antiqueOsmRoadsPercentage", x:"populated_area_km2", y:"highway_length_6_months"} | detailed information about the indicators used can be taken from the "indicators" sheet |
179 changes: 178 additions & 1 deletion src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,19 @@
# 1100 13th Street NW Suite 800 Washington, D.C. 20005
# <[email protected]>
"""Page contains Main core logic of app"""

import os
import subprocess
import sys
import threading
import time
from datetime import datetime
from json import dumps
from json import loads as json_loads

import boto3
import humanize
import orjson
import requests
from area import area
from fastapi import HTTPException
from geojson import FeatureCollection
Expand All @@ -43,13 +45,15 @@
)
from src.config import EXPORT_PATH as export_path
from src.config import INDEX_THRESHOLD as index_threshold
from src.config import POLYGON_STATISTICS_API_URL
from src.config import USE_CONNECTION_POOLING as use_connection_pooling
from src.config import get_db_connection_params, level
from src.config import logger as logging
from src.query_builder.builder import (
check_exisiting_country,
check_last_updated_rawdata,
extract_geometry_type_query,
generate_polygon_stats_graphql_query,
get_countries_query,
get_country_geojson,
get_country_id_query,
Expand Down Expand Up @@ -885,3 +889,176 @@ def upload(self, file_path, file_name, file_suffix="zip"):
f"""https://s3.{bucket_location}.amazonaws.com/{BUCKET_NAME}/{file_name}"""
)
return object_url


class PolygonStats:
"""Generates stats for polygon"""

def __init__(self, geojson):
"""
Initialize PolygonStats with the provided GeoJSON.

Args:
geojson (dict): GeoJSON representation of the polygon.
"""
self.API_URL = POLYGON_STATISTICS_API_URL
self.INPUT_GEOM = dumps(geojson)

@staticmethod
def get_building_pattern_statement(
osm_building_count,
ai_building_count,
avg_timestamp,
osm_building_count_6_months,
):
"""
Translates building stats to a human-readable statement.

Args:
osm_building_count (int): Count of buildings from OpenStreetMap.
ai_building_count (int): Count of buildings from AI estimates.
avg_timestamp (str): Average timestamp of data.
osm_building_count_6_months (int): Count of buildings updated in the last 6 months.

Returns:
str: Human-readable building statement.
"""
building_statement = f"OpenStreetMap contains {humanize.intword(osm_building_count)} buildings in this dataset. Based on AI-mapped estimates, this is approximately {round((osm_building_count/ai_building_count)*100)}% of the total buildings in the region. The average age of data for this region is {avg_timestamp}, and {round((osm_building_count_6_months/ai_building_count)*100)}% buildings were added or updated in the last 6 months."
return building_statement

@staticmethod
def get_road_pattern_statement(
osm_highway_length,
ai_highway_length,
avg_timestamp,
osm_highway_length_6_months,
):
"""
Translates road stats to a human-readable statement.

Args:
osm_highway_length (float): Length of roads from OpenStreetMap.
ai_highway_length (float): Length of roads from AI estimates.
avg_timestamp (str): Average timestamp of data.
osm_highway_length_6_months (float): Length of roads updated in the last 6 months.

Returns:
str: Human-readable road statement.
"""
road_statement = f"OpenStreetMap contains {humanize.intword(osm_highway_length)} km of roads in this dataset. Based on AI-mapped estimates, this is approximately {round(osm_highway_length/ai_highway_length*100)} % of the total road length in the dataset region. The average age of data for the region is {avg_timestamp}, and {round((osm_highway_length_6_months/osm_highway_length)*100)}% of roads were added or updated in the last 6 months."
return road_statement

def get_osm_analytics_meta_stats(self):
"""
Gets the raw stats translated into a JSON body using the OSM Analytics API.

Returns:
dict: Raw statistics translated into JSON.
"""
try:
query = generate_polygon_stats_graphql_query(self.INPUT_GEOM)
payload = {"query": query}
response = requests.post(self.API_URL, json=payload, timeout=20)
response.raise_for_status() # Raise an HTTPError for bad responses
return response.json()
except Exception as e:
print(f"Request failed: {e}")
return None

def get_summary_stats(self):
"""
Generates summary statistics for buildings and roads.

Returns:
dict: Summary statistics including building and road statements.
"""
combined_data = {}
analytics_data = self.get_osm_analytics_meta_stats()
if (
analytics_data is None
or "data" not in analytics_data
or "polygonStatistic" not in analytics_data["data"]
or "analytics" not in analytics_data["data"]["polygonStatistic"]
or "functions"
not in analytics_data["data"]["polygonStatistic"]["analytics"]
or analytics_data["data"]["polygonStatistic"]["analytics"]["functions"]
is None
):
return None
for function in analytics_data["data"]["polygonStatistic"]["analytics"][
"functions"
]:
function_id = function.get("id")
result = function.get("result")
combined_data[function_id] = result if result is not None else 0
combined_data["osm_buildings_freshness_percentage"] = (
100 - combined_data["antiqueOsmBuildingsPercentage"]
)
combined_data["osm_building_completeness_percentage"] = (
100
if combined_data["osmBuildingsCount"] == 0
and combined_data["aiBuildingsCountEstimation"] == 0
else (
combined_data["osmBuildingsCount"]
/ combined_data["aiBuildingsCountEstimation"]
)
* 100
)

combined_data["osm_roads_freshness_percentage"] = (
100 - combined_data["antiqueOsmRoadsPercentage"]
)

combined_data["osm_roads_completeness_percentage"] = (
100 - combined_data["osmRoadGapsPercentage"]
)

combined_data["averageEditTime"] = datetime.fromtimestamp(
combined_data["averageEditTime"]
)
combined_data["lastEditTime"] = datetime.fromtimestamp(
combined_data["lastEditTime"]
)

building_summary = self.get_building_pattern_statement(
combined_data["osmBuildingsCount"],
combined_data["aiBuildingsCountEstimation"],
combined_data["averageEditTime"],
combined_data["building_count_6_months"],
)

road_summary = self.get_road_pattern_statement(
combined_data["highway_length"],
combined_data["aiRoadCountEstimation"],
combined_data["averageEditTime"],
combined_data["highway_length_6_months"],
)

return_stats = {
"summary": {"building": building_summary, "road": road_summary},
"raw": {
"population": combined_data["population"],
"populatedAreaKm2": combined_data["populatedAreaKm2"],
"averageEditTime": combined_data["averageEditTime"].strftime(
"%Y-%m-%d %H:%M:%S"
),
"lastEditTime": combined_data["lastEditTime"].strftime(
"%Y-%m-%d %H:%M:%S"
),
"osmBuildingsCount": combined_data["osmBuildingsCount"],
"osmHighwayLengthKm": combined_data["highway_length"],
"osmUsersCount": combined_data["osmUsersCount"],
"aiBuildingsCountEstimationKm": combined_data[
"aiBuildingsCountEstimation"
],
"aiRoadCountEstimationKm": combined_data["aiRoadCountEstimation"],
"buildingCount6Months": combined_data["building_count_6_months"],
"highwayLength6Months": combined_data["highway_length_6_months"],
},
"meta": {
"indicators": "https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/indicators.md",
"metrics": "https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/metrics.md",
},
}

return return_stats
16 changes: 16 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,22 @@
)


### Polygon statistics which will deliver the stats of approx buildings/ roads in the area

ENABLE_POLYGON_STATISTICS_ENDPOINTS = os.environ.get(
"ENABLE_POLYGON_STATISTICS_ENDPOINTS"
) or config.getboolean(
"API_CONFIG", "ENABLE_POLYGON_STATISTICS_ENDPOINTS", fallback=False
)
POLYGON_STATISTICS_API_URL = os.environ.get("POLYGON_STATISTICS_API_URL") or config.get(
"API_CONFIG", "POLYGON_STATISTICS_API_URL", fallback=None
)

POLYGON_STATISTICS_API_RATE_LIMIT = os.environ.get(
"POLYGON_STATISTICS_API_RATE_LIMIT"
) or config.get("API_CONFIG", "POLYGON_STATISTICS_API_RATE_LIMIT", fallback=5)


def get_db_connection_params() -> dict:
"""Return a python dict that can be passed to psycopg2 connections
to authenticate to Postgres Databases
Expand Down
Loading
Loading