diff --git a/API/api_worker.py b/API/api_worker.py index 167730ed..bf7ba391 100644 --- a/API/api_worker.py +++ b/API/api_worker.py @@ -1,3 +1,4 @@ +import json import os import pathlib import re @@ -10,7 +11,7 @@ import sozipfile.sozipfile as zipfile from celery import Celery -from src.app import RawData, S3FileTransfer +from src.app import PolygonStats, RawData, S3FileTransfer from src.config import ALLOW_BIND_ZIP_FILTER from src.config import CELERY_BROKER_URL as celery_broker_uri from src.config import CELERY_RESULT_BACKEND as celery_backend @@ -68,6 +69,15 @@ def process_raw_data(self, params): file_parts ) inside_file_size = 0 + polygon_stats = None + if "include_stats" in params: + if params.include_stats: + feature = { + "type": "Feature", + "geometry": json.loads(params.geometry.json()), + "properties": {}, + } + polygon_stats = PolygonStats(feature).get_summary_stats() if bind_zip: logging.debug("Zip Binding Started !") # saving file in temp directory instead of memory so that zipping file will not eat memory @@ -93,7 +103,11 @@ def process_raw_data(self, params): # Adding metadata readme.txt readme_content = f"Exported Timestamp (UTC{utc_offset}): {utc_now.strftime('%Y-%m-%d %H:%M:%S')}\n" readme_content += "Exported through Raw-data-api (https://github.com/hotosm/raw-data-api) using OpenStreetMap data.\n" - readme_content += "Learn more about OpenStreetMap and its data usage policy : https://www.openstreetmap.org/about" + readme_content += "Learn more about OpenStreetMap and its data usage policy : https://www.openstreetmap.org/about \n" + if polygon_stats: + readme_content += f'{polygon_stats["summary"]["building"]}\n' + readme_content += f'{polygon_stats["summary"]["road"]}\n' + readme_content += "Read about what this summary means: indicators: https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/indicators.md,metrics: https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/metrics.md" zf.writestr("Readme.txt", readme_content) @@ -156,7 +170,7 @@ def process_raw_data(self, params): logging.info( f"Done Export : {exportname} of {round(inside_file_size/1000000)} MB / {geom_area} sqkm in {response_time_str}" ) - return { + final_response = { "download_url": download_url, "file_name": params.file_name, "process_time": response_time_str, @@ -164,6 +178,9 @@ def process_raw_data(self, params): "binded_file_size": f"{round(inside_file_size/1000000,2)} MB", "zip_file_size_bytes": zip_file_size, } + if polygon_stats: + final_response["stats"] = polygon_stats + return final_response except Exception as ex: raise ex diff --git a/API/main.py b/API/main.py index aa6a2a92..4c9ed793 100644 --- a/API/main.py +++ b/API/main.py @@ -27,6 +27,7 @@ from slowapi.errors import RateLimitExceeded from src.config import ( + ENABLE_POLYGON_STATISTICS_ENDPOINTS, EXPORT_PATH, LIMITER, LOG_LEVEL, @@ -42,6 +43,9 @@ from .raw_data import router as raw_data_router from .tasks import router as tasks_router +if ENABLE_POLYGON_STATISTICS_ENDPOINTS: + from .stats import router as stats_router + # only use sentry if it is specified in config blocks if SENTRY_DSN: sentry_sdk.init( @@ -62,6 +66,9 @@ app.include_router(auth_router) app.include_router(raw_data_router) app.include_router(tasks_router) +if ENABLE_POLYGON_STATISTICS_ENDPOINTS: + app.include_router(stats_router) + app.openapi = { "info": { "title": "Raw Data API", diff --git a/API/stats.py b/API/stats.py new file mode 100644 index 00000000..8fa5ec24 --- /dev/null +++ b/API/stats.py @@ -0,0 +1,27 @@ +from fastapi import APIRouter, Request +from fastapi_versioning import version + +from src.app import PolygonStats +from src.config import LIMITER as limiter +from src.config import POLYGON_STATISTICS_API_RATE_LIMIT +from src.validation.models import StatsRequestParams + +router = APIRouter(prefix="/stats", tags=["Stats"]) + + +@router.post("/polygon/") +@limiter.limit(f"{POLYGON_STATISTICS_API_RATE_LIMIT}/minute") +@version(1) +async def get_polygon_stats(request: Request, params: StatsRequestParams): + """Get statistics for the specified polygon. + + Args: + request (Request): An HTTP request object. + params (StatsRequestParams): Parameters for the statistics request, including the polygon geometry. + + Returns: + dict: A dictionary containing statistics for the specified polygon. + """ + generator = PolygonStats(params.geometry) + + return generator.get_summary_stats() diff --git a/docs/src/installation/configurations.md b/docs/src/installation/configurations.md index 122a2129..0b57d0d8 100644 --- a/docs/src/installation/configurations.md +++ b/docs/src/installation/configurations.md @@ -55,7 +55,9 @@ The following are the different configuration options that are accepted. | `USE_CONNECTION_POOLING` | `USE_CONNECTION_POOLING` | `[API_CONFIG]` | `false` | Enable psycopg2 connection pooling | OPTIONAL | | `ALLOW_BIND_ZIP_FILTER` | `ALLOW_BIND_ZIP_FILTER` | `[API_CONFIG]` | `true` | Enable zip compression for exports | OPTIONAL | | `ENABLE_TILES` | `ENABLE_TILES` | `[API_CONFIG]` | `false` | Enable Tile Output (Pmtiles and Mbtiles) | OPTIONAL | -| `INDEX_THRESHOLD` | `INDEX_THRESHOLD` | `[API_CONFIG]` | `5000` | Area in sqkm to apply grid/country index filter | OPTIONAL | +| `ENABLE_POLYGON_STATISTICS_ENDPOINTS` | `ENABLE_POLYGON_STATISTICS_ENDPOINTS` | `[API_CONFIG]` | `False` | Option to enable endpoints related the polygon statistics about the approx buildings,road length in passed polygon| OPTIONAL | +| `POLYGON_STATISTICS_API_URL` | `POLYGON_STATISTICS_API_URL` | `[API_CONFIG]` | `None` | API URL for the polygon statistics to fetch the metadata , Currently tested with graphql query endpoint of Kontour , Only required if it is enabled from ENABLE_POLYGON_STATISTICS_ENDPOINTS | OPTIONAL | +| `POLYGON_STATISTICS_API_URL` | `POLYGON_STATISTICS_API_RATE_LIMIT` | `[API_CONFIG]` | `5` | Rate limit to be applied for statistics endpoint per minute, Defaults to 5 request is allowed per minute | OPTIONAL | | `CELERY_BROKER_URL` | `CELERY_BROKER_URL` | `[CELERY]` | `redis://localhost:6379/0` | Redis connection string for the broker | OPTIONAL | | `CELERY_RESULT_BACKEND` | `CELERY_RESULT_BACKEND` | `[CELERY]` | `redis://localhost:6379/0` | Redis connection string for the the result backend | OPTIONAL | | `FILE_UPLOAD_METHOD` | `FILE_UPLOAD_METHOD` | `[EXPORT_UPLOAD]` | `disk` | File upload method; Allowed values - disk, s3 | OPTIONAL | @@ -89,6 +91,9 @@ The following are the different configuration options that are accepted. | `ENABLE_TILES` | `[API_CONFIG]` | Yes | Yes | | `ALLOW_BIND_ZIP_FILTER` | `[API_CONFIG]` | Yes | Yes | | `INDEX_THRESHOLD` | `[API_CONFIG]` | No | Yes | +| `ENABLE_POLYGON_STATISTICS_ENDPOINTS` | `[API_CONFIG]` | Yes | No | +| `POLYGON_STATISTICS_API_URL` | `[API_CONFIG]` | Yes | No | +| `POLYGON_STATISTICS_API_RATE_LIMIT` | `[API_CONFIG]` | Yes | No | | `CELERY_BROKER_URL` | TBD | Yes | Yes | | `CELERY_RESULT_BACKEND` | TBD | Yes | Yes | | `FILE_UPLOAD_METHOD` | TBD | Yes | Yes | diff --git a/docs/src/stats/indicators.md b/docs/src/stats/indicators.md new file mode 100644 index 00000000..6c883c51 --- /dev/null +++ b/docs/src/stats/indicators.md @@ -0,0 +1,11 @@ +Indicators | Full name | API variable | Source (copyrights) | Definition | Example | Methodology | Detailed Explanation +----------------------- | ------------------------------------ | ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +OSMbuildingscount | OSM: buildings count | building_count | [© Kontur https://kontur.io/,
© OpenStreetMap contributors https://www.openstreetmap.org/copyright](https://www.openstreetmap.org/copyright) | Total number of buildings in a given area according to OpenStreetMap. | {name:"sumX", id:"sumBuildingCount", x:"building_count"}
allows you to obtain the number of buildings in OSM for the requested area. | OSM building count dataset was built based on H3: Hexagonal Hierarchical Spatial Index. We count the number of buildings per each H3 hexagon at resolution 8. | osm objects with tag 'building' and not "building":"no" +aibuildingscount | Total buildings count | total_building_count | [© Kontur https://kontur.io/,
Copernicus Global Land Service: Land Cover 100 m: Marcel Buchhorn, Bruno Smets, Luc Bertels, Bert De Roo, MyroslavaLesiv, Nandin - Erdene Tsendbazar, … Steffen Fritz. (2020). Copernicus Global Land Service: Land Cover 100m: collection 3: epoch 2019: Globe (Version V3.0.1) Data set. Zenodo. http://doi.org/10.5281/zenodo.3939050,
Geoalert Urban Mapping: Chechnya, Moscow region, Tyva - https://github.com/Geoalert/urban-mapping,
Microsoft Buildings: Australia, Canada, Tanzania, Uganda, USA: This data is licensed by Microsoft under the Open Data Commons Open Database License (ODbL).,
NZ Building Outlines data sourced from the LINZ Data Service - https://data.linz.govt.nz/,
© OpenStreetMap contributors https://www.openstreetmap.org/copyright](https://www.openstreetmap.org/copyright) | Estimated number of buildings in a given area based on various data sources. | {name:"sumX", id:"TotalBuildingCount", x:"total_building_count"}
allows you to obtain the estimated total number of buildings for the requested area. | Total building count dataset was built based on H3: Hexagonal Hierarchical Spatial Index. We take max the number of buildings from different sources (see list of sources) per each H3 hexagon at resolution 8. | For each hex, we compare data from all available sources and select the maximum value. +highway length count | OSM: road length | highway_length | [© Kontur https://kontur.io/,
© OpenStreetMap contributors https://www.openstreetmap.org/copyright](https://www.openstreetmap.org/copyright) | Total length of roads in a given area according to OpenStreetMap. | {name:"maxX", id:"maxHighwayLength", x:"highway_length"}
allows you to obtain the max value of OSM highway length per h3 hexagon at 8 resolution for the requested area. | OSM highway length dataset was built based on H3: Hexagonal Hierarchical Spatial Index. We count the length of all highways (osm objects with tag 'highway') per each H3 hexagon at resolution 8. | osm objects with tag 'highway' and not 'highway':'proposed' (to count only existed roads) and not 'highway':'dummy'
or with tag 'aeroway':'runway', 'aeroway':'taxiway', 'aeroway':'stopway' +ai highway length count | Total road length | total_road_length | [©2019 Facebook, Inc. and its affiliates https://github.com/facebookmicrosites/Open-Mapping-At-Facebook/blob/main/LICENSE.md
© Kontur https://kontur.io/,
© OpenStreetMap contributors https://www.openstreetmap.org/copyright](https://github.com/facebookmicrosites/Open-Mapping-At-Facebook/blob/main/LICENSE.md) | Estimated total road length according to Meta (Facebook) AI and OpenStreetMap data. For places where Meta (Facebook) roads data are unavailable, the estimation is based on statistical regression from Kontur Population data. | {name:"maxX", id:"maxTotalRoadLength", x:"total_road_length"}
allows you to obtain the estimated max value of highway length per h3 hexagon at 8 resolution for the requested area. | The total road length dataset was constructed based on H3: Hexagonal Hierarchical Spatial Index. We augment OSM highway length with estimated road length according to Meta (Facebook) AI for each H3 hexagon at resolution 8. For locations where Meta (Facebook) road data is not available, the estimate is based on statistical regression on Kontur Population data. | More about AI estimation https://github.com/facebookmicrosites/Open-Mapping-At-Facebook/tree/main +building count 6 month | OSM: new buildings (last 6 months) | building_count_6_months | [© Kontur https://kontur.io/,
© OpenStreetMap contributors https://www.openstreetmap.org/copyright](https://www.openstreetmap.org/copyright) | Number of buildings mapped (edited or created) in OpenStreetMap in the last 6 months. | {name:"sumX", id:"sumBuildingCount6Months", x:"building_count_6_months"}
allows you to obtain the number of buildings that have been edited in the last 6 months for the requested area. | The OSM building count (last 6 months) dataset was constructed based on H3: hexagonal hierarchical spatial index. We count the number of buildings that have been edited or created in the last 6 months per H3 hex at resolution 8. | [osm objects with tag 'building' and not "building":"no" and time of the last modification from attributes of osm object < 6 month ago

Time of the last modification (timestamp) from attributes of osm object. More information - https://wiki.openstreetmap.org/wiki/Elements](https://wiki.openstreetmap.org/wiki/Elements) +highway count 6 month | OSM: new road length (last 6 months) | highway_length_6_months | [© Kontur https://kontur.io/,
© OpenStreetMap contributors https://www.openstreetmap.org/copyright](https://www.openstreetmap.org/copyright) | Length of roads mapped (edited or created) in OpenStreetMap in the last 6 months. | {name:"maxX", id:"maxHighwayLength6Months", x:"highway_length_6_months"}
allows you to obtain the max value of OSM highway length per h3 hexagon at 8 resolution for the requested area calculated from the objects that have been edited in the last 6 months. | OSM highway length (last 6 months) dataset was built based on H3: Hexagonal Hierarchical Spatial Index. We count the length of all highways (osm objects with tag 'highway') that have been edited or created in the last 6 months per H3 hex at resolution 8. | [osm objects with tag 'highway' and not 'highway':'proposed' (to count only existed roads) and not 'highway':'dummy'
or with tag 'aeroway':'runway', 'aeroway':'taxiway', 'aeroway':'stopway'
and time of the last modification (timestamp) from attributes of osm object < 6 month ago

Time of the last modification (timestamp) from attributes of osm object. More information - https://wiki.openstreetmap.org/wiki/Elements](https://wiki.openstreetmap.org/wiki/Elements) +average edit time | OSM: last edit (avg) | avgmax_ts | [© Kontur https://kontur.io/,
© OpenStreetMap contributors https://www.openstreetmap.org/copyright](https://www.openstreetmap.org/copyright) | Average of latest OpenStreetMap edit dates in a given area. | {name:"avgX", id:"AverageMaxTimestamp", x:"avgmax_ts"}
allows you to obtain the average last edit date for all OSM objects for the requested area. | Average of latest OpenStreetMap dataset was constructed based on H3: Hexagonal Hierarchical Spatial Index. The timestamps are stored as UNIX Epoch timestamps in seconds. We are only counting the timestamps on tagged OpenStreetMap objects that form geometry objects when processed by Osmium utility (ways, nodes, relations) for each H3 hexagon at resolution 8. The untagged objects are skipped to avoid statistical skew. | [Time of the last modification (timestamp) from attributes of osm object. More information - https://wiki.openstreetmap.org/wiki/Elements](https://wiki.openstreetmap.org/wiki/Elements) +population | Population | population | [Facebook High Resolution Settlement data: Facebook Connectivity Lab and Center for International Earth Science Information Network - CIESIN - Columbia University. 2016. High Resolution Settlement Layer (HRSL). Source imagery for HRSL © 2016 DigitalGlobe. Licence - Creative Commons Attribution International,
Global Human Settlement Layer: Dataset: Schiavina, Marcello; Freire, Sergio; MacManus, Kytt (2019): GHS population grid multitemporal (1975, 1990, 2000, 2015) R2019A. European Commission, Joint Research Centre (JRC) DOI: 10.2905/42E8BE89-54FF-464E-BE7B-BF9E64DA5218 PID: http://data.europa.eu/89h/0c6b9751-a71f-4062-830b-43c9f432370f Concept & Methodology: Freire, Sergio; MacManus, Kytt; Pesaresi, Martino; Doxsey-Whitfield, Erin; Mills, Jane (2016): Development of new open and free multi-temporal global population grids at 250 m resolution. Geospatial Data in a Changing World; Association of Geographic Information Laboratories in Europe (AGILE). AGILE 2016.
Copernicus Global Land Service: Land Cover 100m: Marcel Buchhorn, Bruno Smets, Luc Bertels, Bert De Roo, Myroslava Lesiv, Nandin-Erdene Tsendbazar, Martin Herold, Steffen Fritz. (2020). Copernicus Global Land Service: Land Cover 100m: collection 3: epoch 2019: Globe (Version V3.0.1) [Data set]. Zenodo. http://doi.org/10.5281/zenodo.3939050,
Microsoft Buildings: This data is licensed by Microsoft under the Open Data Commons Open Database License (ODbL).,
Land Information New Zealand (LINZ) NZ Building Outlines: sourced from the sourced from the LINZ Data Service licensed for reuse under CC BY 4.0.,
© Kontur https://kontur.io/,
© OpenStreetMap contributors https://www.openstreetmap.org/copyright](http://doi.org/10.5281/zenodo.3939050) | Number of people living in a given area according to Kontur Population dataset. The dataset was produced by overlaying the Global Human Settlement Layer (GHSL) with available Facebook population data and constraining known artifacts using OpenStreetMap data. | {name:"sumX", id:"sumPopulation", x:"population"}
allows you to obtain the sum of population for the requested area calculated from Kontur Population dataset. | Kontur Population dataset was built based on H3: Hexagonal Hierarchical Spatial Index. Calculating population is based on overlapping Global Human Settlement Layer (GHSL) with Facebook High Resolution Settlement Layer (HRSL) population data where available. Known artifacts of both datasets are constrained using OpenStreetMap data as a hint. Quarries and big roads are marked unpopulated, as they are often falsely detected as populated in GHSL. Lakes, rivers, glaciers, sands, forests, and other similar areas are also marked as unpopulated. We use Microsoft Building Footprint data, Land Information New Zealand and Copernicus Global Land Service: Land Cover 100m to improve the accuracy of population distribution. Building presence, or otherwise built-up area, implies there’s someone on the ground, which is often missed in HRSL data for Africa. We use scale coefficient taking into account the nesting of levels of administrative division in cases when the total population of the administrative area significantly differs from the official population data in OpenStreetMap. Overly hot pixels like "half a million people in a quarter of square kilometer" are spread out to more realistic surroundings. The population is shifted to neighboring cells to satisfy constraints. Non-integer populations are fixed up by "gluing people back together" to keep realistic human beings. | [The datasets is available here: https://data.humdata.org/dataset/kontur-population-dataset](https://data.humdata.org/dataset/kontur-population-dataset) +populated area | Populated area | populated_area_km2 | [Facebook High Resolution Settlement data: Facebook Connectivity Lab and Center for International Earth Science Information Network - CIESIN - Columbia University. 2016. High Resolution Settlement Layer (HRSL). Source imagery for HRSL © 2016 DigitalGlobe. Licence - Creative Commons Attribution International,
Global Human Settlement Layer: Dataset: Schiavina, Marcello; Freire, Sergio; MacManus, Kytt (2019): GHS population grid multitemporal (1975, 1990, 2000, 2015) R2019A. European Commission, Joint Research Centre (JRC) DOI: 10.2905/42E8BE89-54FF-464E-BE7B-BF9E64DA5218 PID: http://data.europa.eu/89h/0c6b9751-a71f-4062-830b-43c9f432370f Concept & Methodology: Freire, Sergio; MacManus, Kytt; Pesaresi, Martino; Doxsey-Whitfield, Erin; Mills, Jane (2016): Development of new open and free multi-temporal global population grids at 250 m resolution. Geospatial Data in a Changing World; Association of Geographic Information Laboratories in Europe (AGILE). AGILE 2016.
Copernicus Global Land Service: Land Cover 100m: Marcel Buchhorn, Bruno Smets, Luc Bertels, Bert De Roo, Myroslava Lesiv, Nandin-Erdene Tsendbazar, Martin Herold, Steffen Fritz. (2020). Copernicus Global Land Service: Land Cover 100m: collection 3: epoch 2019: Globe (Version V3.0.1) [Data set]. Zenodo. http://doi.org/10.5281/zenodo.3939050,
Microsoft Buildings: This data is licensed by Microsoft under the Open Data Commons Open Database License (ODbL).,
Land Information New Zealand (LINZ) NZ Building Outlines: sourced from the sourced from the LINZ Data Service licensed for reuse under CC BY 4.0.,
© Kontur https://kontur.io/,
© OpenStreetMap contributors https://www.openstreetmap.org/copyright](http://doi.org/10.5281/zenodo.3939050) | The sum of the areas of populated hexagons (with population > 0) in square kilometers | {name:"sumX", id:"sumPopulatedArea", x:"populated_area_km2"}
allows you to obtain the sum of populated area for the requested area calculated from Kontur Population dataset. | Populated Area dataset was based on Kontur Population dataset and H3: Hexagonal Hierarchical Spatial Index. We calculate the sum of the areas of hexagons with population > 0 in square kilometers | [More about source dataset - The datasets is available here: https://data.humdata.org/dataset/kontur-population-dataset](https://data.humdata.org/dataset/kontur-population-dataset) \ No newline at end of file diff --git a/docs/src/stats/metrics.md b/docs/src/stats/metrics.md new file mode 100644 index 00000000..a8b16010 --- /dev/null +++ b/docs/src/stats/metrics.md @@ -0,0 +1,6 @@ +| Metric | definition | source | example | detailed explanation | +| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | +| osmbuildingsgappercent | This metric is based on H3: Hexagonal Hierarchical Spatial Index. This metric shows the percentage of populated h3 hexagons at 8 resolution (population > 0) for which there are no buildings in the OSM dataset | populated_area_km2,
building_count | {name:"percentageXWhereNoY", id:"osmBuildingGapsPercentage", x:"populated_area_km2", y:"building_count"} | detailed information about the indicators used can be taken from the "indicators" sheet | +| antiqueosmbuildingpercent | This metric is based on H3: Hexagonal Hierarchical Spatial Index. This metric shows the percentage of populated h3 hexagons at 8 resolution (population > 0) for which there are no buildings in the OSM dataset that have been modified or created in the last 6 months | populated_area_km2,
building_count_6_months | {name:"percentageXWhereNoY", id:"antiqueOsmBuildingsPercentage", x:"populated_area_km2", y:"building_count_6_months"} | detailed information about the indicators used can be taken from the "indicators" sheet | +| osmroadsgappercent | This metric is based on H3: Hexagonal Hierarchical Spatial Index. This metric shows the percentage of populated h3 hexagons at 8 resolution (population > 0) for which there are no highways in the OSM dataset | populated_area_km2,
highway_length | {name:"percentageXWhereNoY", id:"osmRoadGapsPercentage", x:"populated_area_km2", y:"highway_length"} | detailed information about the indicators used can be taken from the "indicators" sheet | +| antiqueosmroadspercent | This metric is based on H3: Hexagonal Hierarchical Spatial Index. This metric shows the percentage of populated h3 hexagons at 8 resolution (population > 0) for which there are no highways in the OSM dataset that have been modified or created in the last 6 months | populated_area_km2,
highway_length_6_months | {name:"percentageXWhereNoY", id:"antiqueOsmRoadsPercentage", x:"populated_area_km2", y:"highway_length_6_months"} | detailed information about the indicators used can be taken from the "indicators" sheet | diff --git a/src/app.py b/src/app.py index ecd518b0..a9bf8fc5 100644 --- a/src/app.py +++ b/src/app.py @@ -17,17 +17,19 @@ # 1100 13th Street NW Suite 800 Washington, D.C. 20005 # """Page contains Main core logic of app""" - import os import subprocess import sys import threading import time +from datetime import datetime from json import dumps from json import loads as json_loads import boto3 +import humanize import orjson +import requests from area import area from fastapi import HTTPException from geojson import FeatureCollection @@ -43,6 +45,7 @@ ) from src.config import EXPORT_PATH as export_path from src.config import INDEX_THRESHOLD as index_threshold +from src.config import POLYGON_STATISTICS_API_URL from src.config import USE_CONNECTION_POOLING as use_connection_pooling from src.config import get_db_connection_params, level from src.config import logger as logging @@ -50,6 +53,7 @@ check_exisiting_country, check_last_updated_rawdata, extract_geometry_type_query, + generate_polygon_stats_graphql_query, get_countries_query, get_country_geojson, get_country_id_query, @@ -885,3 +889,176 @@ def upload(self, file_path, file_name, file_suffix="zip"): f"""https://s3.{bucket_location}.amazonaws.com/{BUCKET_NAME}/{file_name}""" ) return object_url + + +class PolygonStats: + """Generates stats for polygon""" + + def __init__(self, geojson): + """ + Initialize PolygonStats with the provided GeoJSON. + + Args: + geojson (dict): GeoJSON representation of the polygon. + """ + self.API_URL = POLYGON_STATISTICS_API_URL + self.INPUT_GEOM = dumps(geojson) + + @staticmethod + def get_building_pattern_statement( + osm_building_count, + ai_building_count, + avg_timestamp, + osm_building_count_6_months, + ): + """ + Translates building stats to a human-readable statement. + + Args: + osm_building_count (int): Count of buildings from OpenStreetMap. + ai_building_count (int): Count of buildings from AI estimates. + avg_timestamp (str): Average timestamp of data. + osm_building_count_6_months (int): Count of buildings updated in the last 6 months. + + Returns: + str: Human-readable building statement. + """ + building_statement = f"OpenStreetMap contains {humanize.intword(osm_building_count)} buildings in this dataset. Based on AI-mapped estimates, this is approximately {round((osm_building_count/ai_building_count)*100)}% of the total buildings in the region. The average age of data for this region is {avg_timestamp}, and {round((osm_building_count_6_months/ai_building_count)*100)}% buildings were added or updated in the last 6 months." + return building_statement + + @staticmethod + def get_road_pattern_statement( + osm_highway_length, + ai_highway_length, + avg_timestamp, + osm_highway_length_6_months, + ): + """ + Translates road stats to a human-readable statement. + + Args: + osm_highway_length (float): Length of roads from OpenStreetMap. + ai_highway_length (float): Length of roads from AI estimates. + avg_timestamp (str): Average timestamp of data. + osm_highway_length_6_months (float): Length of roads updated in the last 6 months. + + Returns: + str: Human-readable road statement. + """ + road_statement = f"OpenStreetMap contains {humanize.intword(osm_highway_length)} km of roads in this dataset. Based on AI-mapped estimates, this is approximately {round(osm_highway_length/ai_highway_length*100)} % of the total road length in the dataset region. The average age of data for the region is {avg_timestamp}, and {round((osm_highway_length_6_months/osm_highway_length)*100)}% of roads were added or updated in the last 6 months." + return road_statement + + def get_osm_analytics_meta_stats(self): + """ + Gets the raw stats translated into a JSON body using the OSM Analytics API. + + Returns: + dict: Raw statistics translated into JSON. + """ + try: + query = generate_polygon_stats_graphql_query(self.INPUT_GEOM) + payload = {"query": query} + response = requests.post(self.API_URL, json=payload, timeout=20) + response.raise_for_status() # Raise an HTTPError for bad responses + return response.json() + except Exception as e: + print(f"Request failed: {e}") + return None + + def get_summary_stats(self): + """ + Generates summary statistics for buildings and roads. + + Returns: + dict: Summary statistics including building and road statements. + """ + combined_data = {} + analytics_data = self.get_osm_analytics_meta_stats() + if ( + analytics_data is None + or "data" not in analytics_data + or "polygonStatistic" not in analytics_data["data"] + or "analytics" not in analytics_data["data"]["polygonStatistic"] + or "functions" + not in analytics_data["data"]["polygonStatistic"]["analytics"] + or analytics_data["data"]["polygonStatistic"]["analytics"]["functions"] + is None + ): + return None + for function in analytics_data["data"]["polygonStatistic"]["analytics"][ + "functions" + ]: + function_id = function.get("id") + result = function.get("result") + combined_data[function_id] = result if result is not None else 0 + combined_data["osm_buildings_freshness_percentage"] = ( + 100 - combined_data["antiqueOsmBuildingsPercentage"] + ) + combined_data["osm_building_completeness_percentage"] = ( + 100 + if combined_data["osmBuildingsCount"] == 0 + and combined_data["aiBuildingsCountEstimation"] == 0 + else ( + combined_data["osmBuildingsCount"] + / combined_data["aiBuildingsCountEstimation"] + ) + * 100 + ) + + combined_data["osm_roads_freshness_percentage"] = ( + 100 - combined_data["antiqueOsmRoadsPercentage"] + ) + + combined_data["osm_roads_completeness_percentage"] = ( + 100 - combined_data["osmRoadGapsPercentage"] + ) + + combined_data["averageEditTime"] = datetime.fromtimestamp( + combined_data["averageEditTime"] + ) + combined_data["lastEditTime"] = datetime.fromtimestamp( + combined_data["lastEditTime"] + ) + + building_summary = self.get_building_pattern_statement( + combined_data["osmBuildingsCount"], + combined_data["aiBuildingsCountEstimation"], + combined_data["averageEditTime"], + combined_data["building_count_6_months"], + ) + + road_summary = self.get_road_pattern_statement( + combined_data["highway_length"], + combined_data["aiRoadCountEstimation"], + combined_data["averageEditTime"], + combined_data["highway_length_6_months"], + ) + + return_stats = { + "summary": {"building": building_summary, "road": road_summary}, + "raw": { + "population": combined_data["population"], + "populatedAreaKm2": combined_data["populatedAreaKm2"], + "averageEditTime": combined_data["averageEditTime"].strftime( + "%Y-%m-%d %H:%M:%S" + ), + "lastEditTime": combined_data["lastEditTime"].strftime( + "%Y-%m-%d %H:%M:%S" + ), + "osmBuildingsCount": combined_data["osmBuildingsCount"], + "osmHighwayLengthKm": combined_data["highway_length"], + "osmUsersCount": combined_data["osmUsersCount"], + "aiBuildingsCountEstimationKm": combined_data[ + "aiBuildingsCountEstimation" + ], + "aiRoadCountEstimationKm": combined_data["aiRoadCountEstimation"], + "buildingCount6Months": combined_data["building_count_6_months"], + "highwayLength6Months": combined_data["highway_length_6_months"], + }, + "meta": { + "indicators": "https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/indicators.md", + "metrics": "https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/metrics.md", + }, + } + + return return_stats diff --git a/src/config.py b/src/config.py index ace9342d..e0c9872f 100644 --- a/src/config.py +++ b/src/config.py @@ -160,6 +160,22 @@ ) +### Polygon statistics which will deliver the stats of approx buildings/ roads in the area + +ENABLE_POLYGON_STATISTICS_ENDPOINTS = os.environ.get( + "ENABLE_POLYGON_STATISTICS_ENDPOINTS" +) or config.getboolean( + "API_CONFIG", "ENABLE_POLYGON_STATISTICS_ENDPOINTS", fallback=False +) +POLYGON_STATISTICS_API_URL = os.environ.get("POLYGON_STATISTICS_API_URL") or config.get( + "API_CONFIG", "POLYGON_STATISTICS_API_URL", fallback=None +) + +POLYGON_STATISTICS_API_RATE_LIMIT = os.environ.get( + "POLYGON_STATISTICS_API_RATE_LIMIT" +) or config.get("API_CONFIG", "POLYGON_STATISTICS_API_RATE_LIMIT", fallback=5) + + def get_db_connection_params() -> dict: """Return a python dict that can be passed to psycopg2 connections to authenticate to Postgres Databases diff --git a/src/query_builder/builder.py b/src/query_builder/builder.py index d543f564..b2355a57 100644 --- a/src/query_builder/builder.py +++ b/src/query_builder/builder.py @@ -763,3 +763,46 @@ def get_osm_feature_query(osm_id): FROM (select {select_condition} from relations) r WHERE osm_id = {osm_id}""" return query + + +def generate_polygon_stats_graphql_query(geojson_feature): + """ + Gernerates the graphql query for the statistics + """ + query = """ + { + polygonStatistic ( + polygonStatisticRequest: { + polygon: %s + } + ) + { + analytics { + functions(args:[ + {name:"sumX", id:"population", x:"population"}, + {name:"sumX", id:"populatedAreaKm2", x:"populated_area_km2"}, + {name:"percentageXWhereNoY", id:"osmBuildingGapsPercentage", x:"populated_area_km2", y:"building_count"}, + {name:"percentageXWhereNoY", id:"osmRoadGapsPercentage", x:"populated_area_km2", y:"highway_length"}, + {name:"percentageXWhereNoY", id:"antiqueOsmBuildingsPercentage", x:"populated_area_km2", y:"building_count_6_months"}, + {name:"percentageXWhereNoY", id:"antiqueOsmRoadsPercentage", x:"populated_area_km2", y:"highway_length_6_months"}, + {name:"avgX", id:"averageEditTime", x:"avgmax_ts"}, + {name:"maxX", id:"lastEditTime", x:"avgmax_ts"}, + {name:"sumX", id:"osmBuildingsCount", x:"building_count"}, + {name:"sumX", id:"highway_length", x:"highway_length"}, + {name:"sumX", id:"osmUsersCount", x:"osm_users"}, + {name:"sumX", id:"building_count_6_months" , x:"building_count_6_months"}, + {name:"sumX", id:"highway_length_6_months", x:"highway_length_6_months"}, + {name:"sumX", id:"aiBuildingsCountEstimation", x:"total_building_count"} + {name:"sumX", id:"aiRoadCountEstimation", x:"total_road_length"} + + ]) { + id, + result + } + } + } + } + """ + query = query % dumps(geojson_feature) + + return query diff --git a/src/validation/models.py b/src/validation/models.py index 890e5af5..772b1108 100644 --- a/src/validation/models.py +++ b/src/validation/models.py @@ -27,7 +27,12 @@ from pydantic import Field, validator from typing_extensions import TypedDict -from src.config import ALLOW_BIND_ZIP_FILTER, ENABLE_TILES, EXPORT_MAX_AREA_SQKM +from src.config import ( + ALLOW_BIND_ZIP_FILTER, + ENABLE_POLYGON_STATISTICS_ENDPOINTS, + ENABLE_TILES, + EXPORT_MAX_AREA_SQKM, +) def to_camel(string: str) -> str: @@ -41,7 +46,7 @@ class Config: alias_generator = to_camel allow_population_by_field_name = True use_enum_values = True - extra = "forbid" + # extra = "forbid" class RawDataOutputType(Enum): @@ -123,6 +128,11 @@ class RawDataCurrentParamsBase(BaseModel): default=True, description="Exports features which are exactly inside the passed polygons (ST_WITHIN) By default features which are intersected with passed polygon is exported", ) + if ENABLE_POLYGON_STATISTICS_ENDPOINTS: + include_stats: Optional[bool] = Field( + default=False, + description="Includes detailed stats about the polygon passed such as buildings count , road count along with summary about data completeness in the area", + ) filters: Optional[Filters] = Field( default=None, example={ @@ -238,3 +248,30 @@ class StatusResponse(BaseModel): class Config: schema_extra = {"example": {"lastUpdated": "2022-06-27 19:59:24+05:45"}} + + +class StatsRequestParams(BaseModel): + geometry: Union[Polygon, MultiPolygon] = Field( + example={ + "type": "Polygon", + "coordinates": [ + [ + [83.96919250488281, 28.194446860487773], + [83.99751663208006, 28.194446860487773], + [83.99751663208006, 28.214869548073377], + [83.96919250488281, 28.214869548073377], + [83.96919250488281, 28.194446860487773], + ] + ], + }, + ) + + @validator("geometry", allow_reuse=True) + def get_value_as_feature(cls, value): + """Converts geometry to geojson feature""" + feature = { + "type": "Feature", + "geometry": (value.json()), + "properties": {}, + } + return feature