From 1afcd3026d516bf1c8c55126d875ef2b56f14894 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Fri, 15 Sep 2023 00:53:43 +0700 Subject: [PATCH 01/14] Added pmtiles support --- API/api_worker.py | 12 +++++++++--- src/app.py | 37 ++++++++++++++++++++++++++++++------- src/validation/models.py | 1 + 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/API/api_worker.py b/API/api_worker.py index e1f392d3..993870f8 100644 --- a/API/api_worker.py +++ b/API/api_worker.py @@ -36,6 +36,9 @@ def process_raw_data(self, params): if params.output_type else RawDataOutputType.GEOJSON.value ) + if params.output_type == RawDataOutputType.PMTILES.value: + logging.debug("Using STwithin Logic") + params.use_st_within = True params.file_name = ( format_file_name_str(params.file_name) if params.file_name else "Export" ) @@ -65,9 +68,12 @@ def process_raw_data(self, params): logging.debug("Zip Binding Done !") else: for file_path in pathlib.Path(working_dir).iterdir(): - upload_file_path = file_path - inside_file_size += os.path.getsize(file_path) - break # only take one file inside dir , if contains many it should be inside zip + if file_path.is_file() and file_path.name.endswith( + params.output_type.lower() + ): + upload_file_path = file_path + inside_file_size += os.path.getsize(file_path) + break # only take one file inside dir , if contains many it should be inside zip # check if download url will be generated from s3 or not from config if use_s3_to_upload: file_transfer_obj = S3FileTransfer() diff --git a/src/app.py b/src/app.py index cc221512..610069a1 100644 --- a/src/app.py +++ b/src/app.py @@ -469,12 +469,14 @@ def get_grid_id(geom, cur): ) @staticmethod - def to_geojson_raw(results): - """Responsible for geojson writing""" - features = [orjson.loads(row[0]) for row in results] - feature_collection = FeatureCollection(features=features) - - return feature_collection + def geojson2tiles(geojson_path, tile_path, tile_layer_name): + """Responsible for geojson to tiles""" + cmd = """tippecanoe -zg --projection=EPSG:4326 -o {tile_output_path} -l {tile_layer_name} {geojson_input_path} --force""".format( + tile_output_path=tile_path, + tile_layer_name=tile_layer_name, + geojson_input_path=geojson_path, + ) + run_ogr2ogr_cmd(cmd) def extract_current_data(self, exportname): """Responsible for Extracting rawdata current snapshot, Initially it creates a geojson file , Generates query , run it with 1000 chunk size and writes it directly to the geojson file and closes the file after dump @@ -500,6 +502,7 @@ def extract_current_data(self, exportname): # Create a exports directory because it does not exist os.makedirs(working_dir) # create file path with respect to of output type + dump_temp_file_path = os.path.join( working_dir, f"{self.params.file_name if self.params.file_name else 'Export'}.{output_type.lower()}", @@ -518,6 +521,25 @@ def extract_current_data(self, exportname): ), dump_temp_file_path, ) # uses own conversion class + elif output_type == RawDataOutputType.PMTILES.value: + geojson_path = os.path.join( + working_dir, + f"{self.params.file_name if self.params.file_name else 'Export'}.geojson", + ) + RawData.query2geojson( + self.con, + raw_currentdata_extraction_query( + self.params, + g_id=grid_id, + c_id=country, + geometry_dump=geometry_dump, + country_export=country_export, + ), + geojson_path, + ) + RawData.geojson2tiles( + geojson_path, dump_temp_file_path, self.params.file_name + ) elif output_type == RawDataOutputType.SHAPEFILE.value: ( point_query, @@ -686,11 +708,12 @@ def upload(self, file_path, file_name, file_suffix="zip"): sample function call : S3FileTransfer.transfer(file_path="exports",file_prefix="upload_test")""" file_name = f"{file_name}.{file_suffix}" + logging.debug("Started Uploading %s from %s", file_name, file_path) # instantiate upload start_time = time.time() try: - self.s_3.upload_file(file_path, BUCKET_NAME, file_name) + self.s_3.upload_file(str(file_path), BUCKET_NAME, str(file_name)) except Exception as ex: logging.error(ex) raise ex diff --git a/src/validation/models.py b/src/validation/models.py index adbe06db..f1135d72 100644 --- a/src/validation/models.py +++ b/src/validation/models.py @@ -54,6 +54,7 @@ class RawDataOutputType(Enum): GEOPACKAGE = "gpkg" PGDUMP = "sql" CSV = "csv" + PMTILES = "pmtiles" ## EXPERIMENTAL class SupportedFilters(Enum): From 8a0746f7c91b661f3828177ee71bab1b359cc5f8 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Sun, 17 Sep 2023 09:37:38 +0700 Subject: [PATCH 02/14] Make min zoom and max zoom optional for mbtiles --- src/app.py | 30 ++++++++++++++++++++---------- src/validation/models.py | 22 ++++------------------ 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/src/app.py b/src/app.py index 610069a1..229fd999 100644 --- a/src/app.py +++ b/src/app.py @@ -322,16 +322,26 @@ def ogr_export(query, outputtype, working_dir, dump_temp_path, params): file.write(query) # for mbtiles we need additional input as well i.e. minzoom and maxzoom , setting default at max=22 and min=10 if outputtype == RawDataOutputType.MBTILES.value: - cmd = """ogr2ogr -overwrite -f MBTILES -dsco MINZOOM={min_zoom} -dsco MAXZOOM={max_zoom} {export_path} PG:"host={host} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format( - min_zoom=params.min_zoom, - max_zoom=params.max_zoom, - export_path=dump_temp_path, - host=db_items.get("host"), - username=db_items.get("user"), - db=db_items.get("dbname"), - password=db_items.get("password"), - pg_sql_select=query_path, - ) + if params.min_zooom and params.max_zoom: + cmd = """ogr2ogr -overwrite -f MBTILES -dsco MINZOOM={min_zoom} -dsco MAXZOOM={max_zoom} {export_path} PG:"host={host} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format( + min_zoom=params.min_zoom, + max_zoom=params.max_zoom, + export_path=dump_temp_path, + host=db_items.get("host"), + username=db_items.get("user"), + db=db_items.get("dbname"), + password=db_items.get("password"), + pg_sql_select=query_path, + ) + else: + cmd = """ogr2ogr -overwrite -f MBTILES -dsco ZOOM_LEVEL_AUTO=YES {export_path} PG:"host={host} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format( + export_path=dump_temp_path, + host=db_items.get("host"), + username=db_items.get("user"), + db=db_items.get("dbname"), + password=db_items.get("password"), + pg_sql_select=query_path, + ) run_ogr2ogr_cmd(cmd) if outputtype == RawDataOutputType.FLATGEOBUF.value: diff --git a/src/validation/models.py b/src/validation/models.py index f1135d72..c5894b06 100644 --- a/src/validation/models.py +++ b/src/validation/models.py @@ -184,20 +184,6 @@ def check_bind_option(cls, value, values): ) return value - @validator("output_type", allow_reuse=True) - def check_output_type(cls, value, values): - """Checks mbtiles required field""" - if value == RawDataOutputType.MBTILES.value: - if values.get("min_zoom") and values.get("max_zoom"): - if values.get("min_zoom") < 0 or values.get("max_zoom") > 22: - raise ValueError("Zoom range should range from 0-22") - return value - else: - raise ValueError( - "Field min_zoom and max_zoom must be supplied for mbtiles output type" - ) - return value - @validator("geometry", always=True) def check_geometry_area(cls, value, values): """Validates geom area_m2""" @@ -207,10 +193,10 @@ def check_geometry_area(cls, value, values): RAWDATA_CURRENT_POLYGON_AREA = int(EXPORT_MAX_AREA_SQKM) output_type = values.get("output_type") - if output_type: - # for mbtiles ogr2ogr does very worst job when area gets bigger we should write owr own or find better approach for larger area - if output_type == RawDataOutputType.MBTILES.value: - RAWDATA_CURRENT_POLYGON_AREA = 2 # we need to figure out how much tile we are generating before passing request on the basis of bounding box we can restrict user , right now relation contains whole country for now restricted to this area but can not query relation will take ages because that will intersect with country boundary : need to clip it + # if output_type: + # # for mbtiles ogr2ogr does very worst job when area gets bigger we should write owr own or find better approach for larger area + # if output_type == RawDataOutputType.MBTILES.value: + # RAWDATA_CURRENT_POLYGON_AREA = 2 # we need to figure out how much tile we are generating before passing request on the basis of bounding box we can restrict user , right now relation contains whole country for now restricted to this area but can not query relation will take ages because that will intersect with country boundary : need to clip it if area_km2 > RAWDATA_CURRENT_POLYGON_AREA: raise ValueError( f"""Polygon Area {int(area_km2)} Sq.KM is higher than Threshold : {RAWDATA_CURRENT_POLYGON_AREA} Sq.KM for {output_type}""" From 1738bdc4ce166b0bc7bc7f49d2a9a94534b69b69 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Sun, 17 Sep 2023 10:40:37 +0700 Subject: [PATCH 03/14] Add snapshot plain --- API/api_worker.py | 5 +++- API/raw_data.py | 25 +++++++++++++++- src/app.py | 22 +------------- src/query_builder/builder.py | 5 ++-- src/validation/models.py | 56 ++++++++++++++++++++---------------- 5 files changed, 63 insertions(+), 50 deletions(-) diff --git a/API/api_worker.py b/API/api_worker.py index 993870f8..b06015c1 100644 --- a/API/api_worker.py +++ b/API/api_worker.py @@ -36,7 +36,10 @@ def process_raw_data(self, params): if params.output_type else RawDataOutputType.GEOJSON.value ) - if params.output_type == RawDataOutputType.PMTILES.value: + if ( + params.output_type == RawDataOutputType.PMTILES.value + or params.output_type == RawDataOutputType.MBTILES.value + ): logging.debug("Using STwithin Logic") params.use_st_within = True params.file_name = ( diff --git a/API/raw_data.py b/API/raw_data.py index e4e3cf1d..3b3cbdfe 100644 --- a/API/raw_data.py +++ b/API/raw_data.py @@ -33,7 +33,12 @@ from src.config import LIMITER as limiter from src.config import RATE_LIMIT_PER_MIN as export_rate_limit from src.config import logger as logging -from src.validation.models import RawDataCurrentParams, SnapshotResponse, StatusResponse +from src.validation.models import ( + RawDataCurrentParams, + RawDataCurrentParamsBase, + SnapshotResponse, + StatusResponse, +) from .api_worker import process_raw_data @@ -434,6 +439,24 @@ def get_osm_current_snapshot_as_file( return JSONResponse({"task_id": task.id, "track_link": f"/tasks/status/{task.id}/"}) +@router.get("/snapshot/plain/", response_model=FeatureCollection) +@version(1) +def get_osm_current_snapshot_as_plain_geojson( + request: Request, params: RawDataCurrentParamsBase +): + """Generates the Plain geojson for the polygon within 100 Sqkm and returns the result right away + + Args: + request (Request): _description_ + params (RawDataCurrentParamsBase): Same as /snapshot excpet multiple output format options and configurations + + Returns: + Featurecollection: Geojson + """ + result = RawData(params).extract_plain_geojson() + return result + + @router.get("/countries/", response_model=FeatureCollection) @version(1) def get_countries(q: str = ""): diff --git a/src/app.py b/src/app.py index 229fd999..1cd5f792 100644 --- a/src/app.py +++ b/src/app.py @@ -526,7 +526,6 @@ def extract_current_data(self, exportname): self.params, g_id=grid_id, c_id=country, - geometry_dump=geometry_dump, country_export=country_export, ), dump_temp_file_path, @@ -542,7 +541,6 @@ def extract_current_data(self, exportname): self.params, g_id=grid_id, c_id=country, - geometry_dump=geometry_dump, country_export=country_export, ), geojson_path, @@ -580,7 +578,6 @@ def extract_current_data(self, exportname): self.params, grid_id, country, - geometry_dump, ogr_export=True, country_export=country_export, ), @@ -645,24 +642,7 @@ def get_osm_feature(self, osm_id): def extract_plain_geojson(self): """Gets geojson for small area : Performs direct query with/without geometry""" - query = raw_extract_plain_geojson(self.params, inspect_only=True) - self.cur.execute(query) - analyze_fetched = self.cur.fetchall() - rows = list( - filter(lambda x: x.startswith("rows"), analyze_fetched[0][0].split()) - ) - approx_returned_rows = rows[0].split("=")[1] - logging.debug("Approximated query output : %s", approx_returned_rows) - - if int(approx_returned_rows) > 500: - self.cur.close() - RawData.close_con(self.con) - raise HTTPException( - status_code=500, - detail=f"Query returned {approx_returned_rows} rows (This endpoint supports upto 1000) , Use /current-snapshot/ for larger extraction", - ) - - extraction_query = raw_extract_plain_geojson(self.params) + extraction_query = raw_currentdata_extraction_query(self.params) features = [] with self.con.cursor( diff --git a/src/query_builder/builder.py b/src/query_builder/builder.py index 056c840b..a690437d 100644 --- a/src/query_builder/builder.py +++ b/src/query_builder/builder.py @@ -461,9 +461,8 @@ def get_country_geojson(c_id): def raw_currentdata_extraction_query( params, - g_id, - c_id, - geometry_dump, + g_id=None, + c_id=None, ogr_export=False, select_all=False, country_export=False, diff --git a/src/validation/models.py b/src/validation/models.py index c5894b06..2c140937 100644 --- a/src/validation/models.py +++ b/src/validation/models.py @@ -124,17 +124,7 @@ class Filters(BaseModel): attributes: Optional[AttributeFilter] -class RawDataCurrentParams(BaseModel): - output_type: Optional[RawDataOutputType] = Field( - default=RawDataOutputType.GEOJSON.value, example="geojson" - ) - min_zoom: Optional[int] = Field( - default=None, description="Only for mbtiles" - ) # only for if mbtiles is output - max_zoom: Optional[int] = Field( - default=None, description="Only for mbtiles" - ) # only for if mbtiles is output - file_name: Optional[str] = Field(default=None, example="My test export") +class RawDataCurrentParamsBase(BaseModel): geometry_type: Optional[List[SupportedGeometryFilters]] = Field( default=None, example=["point", "polygon"] ) @@ -142,7 +132,7 @@ class RawDataCurrentParams(BaseModel): default=False, description="Exports centroid of features as geom" ) use_st_within: Optional[bool] = Field( - default=False, + default=True, description="Exports features which are exactly inside the passed polygons (ST_WITHIN) By default features which are intersected with passed polygon is exported", ) filters: Optional[Filters] = Field( @@ -168,6 +158,36 @@ class RawDataCurrentParams(BaseModel): ], }, ) + + @validator("geometry", always=True) + def check_geometry_area(cls, value, values): + """Validates geom area_m2""" + area_m2 = area(json.loads(value.json())) + area_km2 = area_m2 * 1e-6 + RAWDATA_CURRENT_POLYGON_AREA = int(EXPORT_MAX_AREA_SQKM) + if area_km2 > 100: # 100 square km + raise ValueError( + f"""Polygon Area {int(area_km2)} Sq.KM is higher than Threshold : {RAWDATA_CURRENT_POLYGON_AREA} Sq.KM for {output_type}""" + ) + return value + + @validator("geometry_type", allow_reuse=True) + def return_unique_value(cls, value): + """return unique list""" + return list(set(value)) + + +class RawDataCurrentParams(RawDataCurrentParamsBase): + output_type: Optional[RawDataOutputType] = Field( + default=RawDataOutputType.GEOJSON.value, example="geojson" + ) + min_zoom: Optional[int] = Field( + default=None, description="Only for mbtiles" + ) # only for if mbtiles is output + max_zoom: Optional[int] = Field( + default=None, description="Only for mbtiles" + ) # only for if mbtiles is output + file_name: Optional[str] = Field(default=None, example="My test export") uuid: Optional[bool] = Field( default=True, description="Attaches uid to exports by default , Only disable this if it is recurring export", @@ -189,25 +209,13 @@ def check_geometry_area(cls, value, values): """Validates geom area_m2""" area_m2 = area(json.loads(value.json())) area_km2 = area_m2 * 1e-6 - RAWDATA_CURRENT_POLYGON_AREA = int(EXPORT_MAX_AREA_SQKM) - - output_type = values.get("output_type") - # if output_type: - # # for mbtiles ogr2ogr does very worst job when area gets bigger we should write owr own or find better approach for larger area - # if output_type == RawDataOutputType.MBTILES.value: - # RAWDATA_CURRENT_POLYGON_AREA = 2 # we need to figure out how much tile we are generating before passing request on the basis of bounding box we can restrict user , right now relation contains whole country for now restricted to this area but can not query relation will take ages because that will intersect with country boundary : need to clip it if area_km2 > RAWDATA_CURRENT_POLYGON_AREA: raise ValueError( f"""Polygon Area {int(area_km2)} Sq.KM is higher than Threshold : {RAWDATA_CURRENT_POLYGON_AREA} Sq.KM for {output_type}""" ) return value - @validator("geometry_type", allow_reuse=True) - def return_unique_value(cls, value): - """return unique list""" - return list(set(value)) - class WhereCondition(TypedDict): key: str From e63bf42967ecf942dcc1b12a1641eb596b7b0bde Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Sun, 17 Sep 2023 11:49:10 +0700 Subject: [PATCH 04/14] Fixed tests using geometry dump --- tests/test_app.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index 585d9898..9b9238eb 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -69,7 +69,6 @@ def test_rawdata_current_snapshot_geometry_query(): validated_params, g_id=None, c_id=None, - geometry_dump=dumps(dict(validated_params.geometry)), ) assert query_result.encode("utf-8") == expected_query.encode("utf-8") @@ -116,7 +115,6 @@ def test_rawdata_current_snapshot_normal_query(): validated_params, g_id=None, c_id=None, - geometry_dump=dumps(dict(validated_params.geometry)), ) assert query_result.encode("utf-8") == expected_query.encode("utf-8") @@ -168,7 +166,6 @@ def test_attribute_filter_rawdata(): validated_params, g_id=[[1187], [1188]], c_id=None, - geometry_dump=dumps(dict(validated_params.geometry)), ) assert query_result.encode("utf-8") == expected_query.encode("utf-8") @@ -231,6 +228,5 @@ def test_and_filters(): validated_params, g_id=None, c_id=None, - geometry_dump=dumps(dict(validated_params.geometry)), ) assert query_result.encode("utf-8") == expected_query.encode("utf-8") From a1204fa32b8e87417a69c65ecfa5ac3a421150cc Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Mon, 2 Oct 2023 17:18:26 +0545 Subject: [PATCH 05/14] remove gid cid --- tests/test_app.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index 9b9238eb..5807899e 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -67,8 +67,6 @@ def test_rawdata_current_snapshot_geometry_query(): ST_intersects(geom,ST_GEOMFROMGEOJSON('{"coordinates": [[[84.92431640625, 27.766190642387496], [85.31982421875, 27.766190642387496], [85.31982421875, 28.02592458049937], [84.92431640625, 28.02592458049937], [84.92431640625, 27.766190642387496]]], "type": "Polygon"}'))) t3""" query_result = raw_currentdata_extraction_query( validated_params, - g_id=None, - c_id=None, ) assert query_result.encode("utf-8") == expected_query.encode("utf-8") @@ -113,8 +111,6 @@ def test_rawdata_current_snapshot_normal_query(): ST_intersects(geom,ST_GEOMFROMGEOJSON('{"coordinates": [[[84.92431640625, 27.766190642387496], [85.31982421875, 27.766190642387496], [85.31982421875, 28.02592458049937], [84.92431640625, 28.02592458049937], [84.92431640625, 27.766190642387496]]], "type": "Polygon"}'))) t3""" query_result = raw_currentdata_extraction_query( validated_params, - g_id=None, - c_id=None, ) assert query_result.encode("utf-8") == expected_query.encode("utf-8") @@ -165,7 +161,6 @@ def test_attribute_filter_rawdata(): query_result = raw_currentdata_extraction_query( validated_params, g_id=[[1187], [1188]], - c_id=None, ) assert query_result.encode("utf-8") == expected_query.encode("utf-8") @@ -226,7 +221,5 @@ def test_and_filters(): ST_intersects(geom,ST_GEOMFROMGEOJSON('{"coordinates": [[[36.70588085657477, 37.1979648807274], [36.70588085657477, 37.1651408422983], [36.759267544807194, 37.1651408422983], [36.759267544807194, 37.1979648807274], [36.70588085657477, 37.1979648807274]]], "type": "Polygon"}')) and (tags ->> 'destroyed:building' = 'yes' AND tags ->> 'damage:date' = '2023-02-06') and (geometrytype(geom)='POLYGON' or geometrytype(geom)='MULTIPOLYGON')) t1""" query_result = raw_currentdata_extraction_query( validated_params, - g_id=None, - c_id=None, ) assert query_result.encode("utf-8") == expected_query.encode("utf-8") From e4cd9b33432966cb8f6040436daa8d8f4fa651f9 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Mon, 9 Oct 2023 22:52:59 +0545 Subject: [PATCH 06/14] Mostly this should fix tests that compares intersects query --- tests/test_app.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_app.py b/tests/test_app.py index 5807899e..05e3d1fe 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -38,6 +38,7 @@ def test_rawdata_current_snapshot_geometry_query(): ], }, "outputType": "geojson", + "useStWithin": False, "filters": { "tags": {"point": {"join_or": {"amenity": ["shop", "toilet"]}}}, "attributes": {"point": ["name"]}, @@ -85,6 +86,7 @@ def test_rawdata_current_snapshot_normal_query(): ] ], }, + "useStWithin": False, "outputType": "geojson", } validated_params = RawDataCurrentParams(**test_param) @@ -130,6 +132,7 @@ def test_attribute_filter_rawdata(): ], }, "outputType": "geojson", + "useStWithin": False, "geometryType": ["polygon", "line"], "filters": { "attributes": {"line": ["name"]}, @@ -181,6 +184,7 @@ def test_and_filters(): ], }, "outputType": "geojson", + "useStWithin": False, "geometryType": ["polygon"], "filters": { "tags": { From 02d550362b2cca9b74c14f15148f7b3bd2a55e89 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Mon, 9 Oct 2023 22:58:47 +0545 Subject: [PATCH 07/14] Add test cases for st_within --- tests/test_app.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/test_app.py b/tests/test_app.py index 05e3d1fe..c16a102b 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -117,6 +117,50 @@ def test_rawdata_current_snapshot_normal_query(): assert query_result.encode("utf-8") == expected_query.encode("utf-8") +def test_rawdata_current_snapshot_normal_query_ST_within(): + test_param = { + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [84.92431640625, 27.766190642387496], + [85.31982421875, 27.766190642387496], + [85.31982421875, 28.02592458049937], + [84.92431640625, 28.02592458049937], + [84.92431640625, 27.766190642387496], + ] + ], + }, + "outputType": "geojson", + } + validated_params = RawDataCurrentParams(**test_param) + expected_query = """select ST_AsGeoJSON(t0.*) from (select + osm_id ,version,tags,changeset,timestamp,geom + from + nodes + where + ST_within(geom,ST_GEOMFROMGEOJSON('{"coordinates": [[[84.92431640625, 27.766190642387496], [85.31982421875, 27.766190642387496], [85.31982421875, 28.02592458049937], [84.92431640625, 28.02592458049937], [84.92431640625, 27.766190642387496]]], "type": "Polygon"}'))) t0 UNION ALL select ST_AsGeoJSON(t1.*) from (select + osm_id ,version,tags,changeset,timestamp,geom + from + ways_line + where + ST_within(geom,ST_GEOMFROMGEOJSON('{"coordinates": [[[84.92431640625, 27.766190642387496], [85.31982421875, 27.766190642387496], [85.31982421875, 28.02592458049937], [84.92431640625, 28.02592458049937], [84.92431640625, 27.766190642387496]]], "type": "Polygon"}'))) t1 UNION ALL select ST_AsGeoJSON(t2.*) from (select + osm_id ,version,tags,changeset,timestamp,geom + from + ways_poly + where + ST_within(geom,ST_GEOMFROMGEOJSON('{"coordinates": [[[84.92431640625, 27.766190642387496], [85.31982421875, 27.766190642387496], [85.31982421875, 28.02592458049937], [84.92431640625, 28.02592458049937], [84.92431640625, 27.766190642387496]]], "type": "Polygon"}'))) t2 UNION ALL select ST_AsGeoJSON(t3.*) from (select + osm_id ,version,tags,changeset,timestamp,geom + from + relations + where + ST_within(geom,ST_GEOMFROMGEOJSON('{"coordinates": [[[84.92431640625, 27.766190642387496], [85.31982421875, 27.766190642387496], [85.31982421875, 28.02592458049937], [84.92431640625, 28.02592458049937], [84.92431640625, 27.766190642387496]]], "type": "Polygon"}'))) t3""" + query_result = raw_currentdata_extraction_query( + validated_params, + ) + assert query_result.encode("utf-8") == expected_query.encode("utf-8") + + def test_attribute_filter_rawdata(): test_param = { "geometry": { From 876d891bfc457b0ef884ba14b598028771756b57 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Mon, 9 Oct 2023 23:18:12 +0545 Subject: [PATCH 08/14] Fix typo on min zoom --- src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index 1cd5f792..c6b95ab1 100644 --- a/src/app.py +++ b/src/app.py @@ -322,7 +322,7 @@ def ogr_export(query, outputtype, working_dir, dump_temp_path, params): file.write(query) # for mbtiles we need additional input as well i.e. minzoom and maxzoom , setting default at max=22 and min=10 if outputtype == RawDataOutputType.MBTILES.value: - if params.min_zooom and params.max_zoom: + if params.min_zoom and params.max_zoom: cmd = """ogr2ogr -overwrite -f MBTILES -dsco MINZOOM={min_zoom} -dsco MAXZOOM={max_zoom} {export_path} PG:"host={host} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format( min_zoom=params.min_zoom, max_zoom=params.max_zoom, From 3dc9ffc29cab9a7a574c04c7a9b6aace21c5d8fa Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Tue, 10 Oct 2023 13:43:52 +0545 Subject: [PATCH 09/14] Made pmtiles and mbtiles support optional --- README.md | 5 +++++ docs/src/installation/configurations.md | 2 ++ src/config.py | 4 ++++ src/validation/models.py | 20 +++++++++++--------- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index e08a1296..69dd9bfa 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,8 @@ | GeoPackage | :heavy_check_mark: | | PGDUMP | :heavy_check_mark: | | GeoJSON | :heavy_check_mark: | + | Pmtiles | :heavy_check_mark: | + | Mbtiles | :heavy_check_mark: | ## Installation @@ -90,6 +92,9 @@ Setup the necessary configurations for Raw Data API from [configurations](./docs Setup config.txt in project root. +## Optional : For Tiles Output +If you opt for tiles output and have ```ENABLE_TILES : True``` in env variable . Make sure you install [Tippecanoe] (https://github.com/mapbox/tippecanoe) + ### Start the Server ``` diff --git a/docs/src/installation/configurations.md b/docs/src/installation/configurations.md index ebb39782..20a05ac4 100644 --- a/docs/src/installation/configurations.md +++ b/docs/src/installation/configurations.md @@ -41,6 +41,7 @@ The following are the different configuration options that are accepted. | `EXPORT_MAX_AREA_SQKM` | `EXPORT_MAX_AREA_SQKM` | `[API_CONFIG]` | `100000` | max area in sq. km. to support for rawdata input | OPTIONAL | | `USE_CONNECTION_POOLING` | `USE_CONNECTION_POOLING` | `[API_CONFIG]` | `false` | Enable psycopg2 connection pooling | OPTIONAL | | `ALLOW_BIND_ZIP_FILTER` | `ALLOW_BIND_ZIP_FILTER` | `[API_CONFIG]` | `true` | Enable zip compression for exports | OPTIONAL | +| `ENABLE_TILES` | `ENABLE_TILES` | `[API_CONFIG]` | `false` | Enable Tile Output (Pmtiles and Mbtiles) | OPTIONAL | | `INDEX_THRESHOLD` | `INDEX_THRESHOLD` | `[API_CONFIG]` | `5000` | Area in sqkm to apply grid/country index filter | OPTIONAL | | `CELERY_BROKER_URL` | `CELERY_BROKER_URL` | `[CELERY]` | `redis://localhost:6379/0` | Redis connection string for the broker | OPTIONAL | | `CELERY_RESULT_BACKEND` | `CELERY_RESULT_BACKEND` | `[CELERY]` | `redis://localhost:6379/0` | Redis connection string for the the result backend | OPTIONAL | @@ -72,6 +73,7 @@ The following are the different configuration options that are accepted. | `EXPORT_PATH` | `[API_CONFIG]` | Yes | Yes | | `EXPORT_MAX_AREA_SQKM` | `[API_CONFIG]` | Yes | No | | `USE_CONNECTION_POOLING` | `[API_CONFIG]` | Yes | Yes | +| `ENABLE_TILES` | `[API_CONFIG]` | Yes | No | | `ALLOW_BIND_ZIP_FILTER` | `[API_CONFIG]` | Yes | Yes | | `INDEX_THRESHOLD` | `[API_CONFIG]` | No | Yes | | `CELERY_BROKER_URL` | TBD | Yes | Yes | diff --git a/src/config.py b/src/config.py index 58b67081..a7b4b621 100644 --- a/src/config.py +++ b/src/config.py @@ -72,6 +72,10 @@ "API_CONFIG", "ALLOW_BIND_ZIP_FILTER", fallback=None ) +ENABLE_TILES = os.environ.get("ENABLE_TILES") or config.get( + "API_CONFIG", "ENABLE_TILES", fallback=None +) + #################### ### EXPORT_UPLOAD CONFIG BLOCK diff --git a/src/validation/models.py b/src/validation/models.py index 2c140937..842f4f02 100644 --- a/src/validation/models.py +++ b/src/validation/models.py @@ -28,7 +28,7 @@ from pydantic import Field, validator from typing_extensions import TypedDict -from src.config import ALLOW_BIND_ZIP_FILTER, EXPORT_MAX_AREA_SQKM +from src.config import ALLOW_BIND_ZIP_FILTER, ENABLE_TILES, EXPORT_MAX_AREA_SQKM def to_camel(string: str) -> str: @@ -50,11 +50,12 @@ class RawDataOutputType(Enum): KML = "kml" SHAPEFILE = "shp" FLATGEOBUF = "fgb" - MBTILES = "mbtiles" # fully experimental for now GEOPACKAGE = "gpkg" PGDUMP = "sql" CSV = "csv" - PMTILES = "pmtiles" ## EXPERIMENTAL + if ENABLE_TILES: + MBTILES = "mbtiles" + PMTILES = "pmtiles" ## EXPERIMENTAL class SupportedFilters(Enum): @@ -181,12 +182,13 @@ class RawDataCurrentParams(RawDataCurrentParamsBase): output_type: Optional[RawDataOutputType] = Field( default=RawDataOutputType.GEOJSON.value, example="geojson" ) - min_zoom: Optional[int] = Field( - default=None, description="Only for mbtiles" - ) # only for if mbtiles is output - max_zoom: Optional[int] = Field( - default=None, description="Only for mbtiles" - ) # only for if mbtiles is output + if ENABLE_TILES: + min_zoom: Optional[int] = Field( + default=None, description="Only for mbtiles" + ) # only for if mbtiles is output + max_zoom: Optional[int] = Field( + default=None, description="Only for mbtiles" + ) # only for if mbtiles is output file_name: Optional[str] = Field(default=None, example="My test export") uuid: Optional[bool] = Field( default=True, From 412133042d982b5a00fcf9cdb8a92788a3e2d4b0 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Tue, 10 Oct 2023 14:44:02 +0545 Subject: [PATCH 10/14] Added enable tiles support for app --- API/api_worker.py | 14 +-- docs/src/installation/configurations.md | 2 +- src/app.py | 108 ++++++++++++++---------- 3 files changed, 74 insertions(+), 50 deletions(-) diff --git a/API/api_worker.py b/API/api_worker.py index b06015c1..ea6a97bf 100644 --- a/API/api_worker.py +++ b/API/api_worker.py @@ -12,6 +12,7 @@ from src.config import ALLOW_BIND_ZIP_FILTER from src.config import CELERY_BROKER_URL as celery_broker_uri from src.config import CELERY_RESULT_BACKEND as celery_backend +from src.config import ENABLE_TILES from src.config import USE_S3_TO_UPLOAD as use_s3_to_upload from src.config import logger as logging from src.query_builder.builder import format_file_name_str @@ -36,12 +37,13 @@ def process_raw_data(self, params): if params.output_type else RawDataOutputType.GEOJSON.value ) - if ( - params.output_type == RawDataOutputType.PMTILES.value - or params.output_type == RawDataOutputType.MBTILES.value - ): - logging.debug("Using STwithin Logic") - params.use_st_within = True + if ENABLE_TILES: + if ( + params.output_type == RawDataOutputType.PMTILES.value + or params.output_type == RawDataOutputType.MBTILES.value + ): + logging.debug("Using STwithin Logic") + params.use_st_within = True params.file_name = ( format_file_name_str(params.file_name) if params.file_name else "Export" ) diff --git a/docs/src/installation/configurations.md b/docs/src/installation/configurations.md index 20a05ac4..3aa4dd67 100644 --- a/docs/src/installation/configurations.md +++ b/docs/src/installation/configurations.md @@ -73,7 +73,7 @@ The following are the different configuration options that are accepted. | `EXPORT_PATH` | `[API_CONFIG]` | Yes | Yes | | `EXPORT_MAX_AREA_SQKM` | `[API_CONFIG]` | Yes | No | | `USE_CONNECTION_POOLING` | `[API_CONFIG]` | Yes | Yes | -| `ENABLE_TILES` | `[API_CONFIG]` | Yes | No | +| `ENABLE_TILES` | `[API_CONFIG]` | Yes | Yes | | `ALLOW_BIND_ZIP_FILTER` | `[API_CONFIG]` | Yes | Yes | | `INDEX_THRESHOLD` | `[API_CONFIG]` | No | Yes | | `CELERY_BROKER_URL` | TBD | Yes | Yes | diff --git a/src/app.py b/src/app.py index c6b95ab1..ba0457dc 100644 --- a/src/app.py +++ b/src/app.py @@ -34,7 +34,12 @@ from psycopg2 import OperationalError, connect from psycopg2.extras import DictCursor -from src.config import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, BUCKET_NAME +from src.config import ( + AWS_ACCESS_KEY_ID, + AWS_SECRET_ACCESS_KEY, + BUCKET_NAME, + ENABLE_TILES, +) from src.config import EXPORT_PATH as export_path from src.config import INDEX_THRESHOLD as index_threshold from src.config import USE_CONNECTION_POOLING as use_connection_pooling @@ -321,28 +326,29 @@ def ogr_export(query, outputtype, working_dir, dump_temp_path, params): with open(query_path, "w", encoding="UTF-8") as file: file.write(query) # for mbtiles we need additional input as well i.e. minzoom and maxzoom , setting default at max=22 and min=10 - if outputtype == RawDataOutputType.MBTILES.value: - if params.min_zoom and params.max_zoom: - cmd = """ogr2ogr -overwrite -f MBTILES -dsco MINZOOM={min_zoom} -dsco MAXZOOM={max_zoom} {export_path} PG:"host={host} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format( - min_zoom=params.min_zoom, - max_zoom=params.max_zoom, - export_path=dump_temp_path, - host=db_items.get("host"), - username=db_items.get("user"), - db=db_items.get("dbname"), - password=db_items.get("password"), - pg_sql_select=query_path, - ) - else: - cmd = """ogr2ogr -overwrite -f MBTILES -dsco ZOOM_LEVEL_AUTO=YES {export_path} PG:"host={host} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format( - export_path=dump_temp_path, - host=db_items.get("host"), - username=db_items.get("user"), - db=db_items.get("dbname"), - password=db_items.get("password"), - pg_sql_select=query_path, - ) - run_ogr2ogr_cmd(cmd) + if ENABLE_TILES: + if outputtype == RawDataOutputType.MBTILES.value: + if params.min_zoom and params.max_zoom: + cmd = """ogr2ogr -overwrite -f MBTILES -dsco MINZOOM={min_zoom} -dsco MAXZOOM={max_zoom} {export_path} PG:"host={host} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format( + min_zoom=params.min_zoom, + max_zoom=params.max_zoom, + export_path=dump_temp_path, + host=db_items.get("host"), + username=db_items.get("user"), + db=db_items.get("dbname"), + password=db_items.get("password"), + pg_sql_select=query_path, + ) + else: + cmd = """ogr2ogr -overwrite -f MBTILES -dsco ZOOM_LEVEL_AUTO=YES {export_path} PG:"host={host} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format( + export_path=dump_temp_path, + host=db_items.get("host"), + username=db_items.get("user"), + db=db_items.get("dbname"), + password=db_items.get("password"), + pg_sql_select=query_path, + ) + run_ogr2ogr_cmd(cmd) if outputtype == RawDataOutputType.FLATGEOBUF.value: cmd = """ogr2ogr -overwrite -f FLATGEOBUF {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress VERIFY_BUFFERS=NO""".format( @@ -519,6 +525,40 @@ def extract_current_data(self, exportname): ) try: # currently we have only geojson binding function written other than that we have depend on ogr + if ENABLE_TILES: + if output_type == RawDataOutputType.PMTILES.value: + geojson_path = os.path.join( + working_dir, + f"{self.params.file_name if self.params.file_name else 'Export'}.geojson", + ) + RawData.query2geojson( + self.con, + raw_currentdata_extraction_query( + self.params, + g_id=grid_id, + c_id=country, + country_export=country_export, + ), + geojson_path, + ) + RawData.geojson2tiles( + geojson_path, dump_temp_file_path, self.params.file_name + ) + if output_type == RawDataOutputType.MBTILES.value: + RawData.ogr_export( + query=raw_currentdata_extraction_query( + self.params, + grid_id, + country, + ogr_export=True, + country_export=country_export, + ), + outputtype=output_type, + dump_temp_path=dump_temp_file_path, + working_dir=working_dir, + params=self.params, + ) # uses ogr export to export + if output_type == RawDataOutputType.GEOJSON.value: RawData.query2geojson( self.con, @@ -530,25 +570,7 @@ def extract_current_data(self, exportname): ), dump_temp_file_path, ) # uses own conversion class - elif output_type == RawDataOutputType.PMTILES.value: - geojson_path = os.path.join( - working_dir, - f"{self.params.file_name if self.params.file_name else 'Export'}.geojson", - ) - RawData.query2geojson( - self.con, - raw_currentdata_extraction_query( - self.params, - g_id=grid_id, - c_id=country, - country_export=country_export, - ), - geojson_path, - ) - RawData.geojson2tiles( - geojson_path, dump_temp_file_path, self.params.file_name - ) - elif output_type == RawDataOutputType.SHAPEFILE.value: + if output_type == RawDataOutputType.SHAPEFILE.value: ( point_query, line_query, @@ -572,7 +594,7 @@ def extract_current_data(self, exportname): if self.params.file_name else "Export", ) # using ogr2ogr - else: + if output_type in ["fgb", "gpkg", "sql", "csv"]: RawData.ogr_export( query=raw_currentdata_extraction_query( self.params, From 06962d485f33959ab61cf7cf336a169923f30b5b Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 12 Oct 2023 08:33:22 +0545 Subject: [PATCH 11/14] Added TM Support for exports --- API/api_worker.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/API/api_worker.py b/API/api_worker.py index ea6a97bf..c08e8d59 100644 --- a/API/api_worker.py +++ b/API/api_worker.py @@ -1,5 +1,6 @@ import os import pathlib +import re import shutil import time import zipfile @@ -82,9 +83,19 @@ def process_raw_data(self, params): # check if download url will be generated from s3 or not from config if use_s3_to_upload: file_transfer_obj = S3FileTransfer() + upload_name = exportname if params.uuid else f"Recurring/{exportname}" + if exportname.startswith("hotosm_project"): + if not params.uuid: + pattern = r"(hotosm_project_)(\d+)" + match = re.match(pattern, exportname) + if match: + prefix = match.group(1) + project_number = match.group(2) + if project_number: + upload_name = f"TM/{project_number}/{exportname}" download_url = file_transfer_obj.upload( upload_file_path, - exportname if params.uuid else f"Recurring/{exportname}", + upload_name, file_suffix="zip" if bind_zip else params.output_type.lower(), ) else: From bf216e10a4278f801178054c20a6639427ac8b2a Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Sat, 14 Oct 2023 13:41:37 +0545 Subject: [PATCH 12/14] add support for hdx exports --- API/api_worker.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/API/api_worker.py b/API/api_worker.py index c08e8d59..601a6920 100644 --- a/API/api_worker.py +++ b/API/api_worker.py @@ -93,6 +93,15 @@ def process_raw_data(self, params): project_number = match.group(2) if project_number: upload_name = f"TM/{project_number}/{exportname}" + elif exportname.startswith("hotosm_"): + if not params.uuid: + pattern = r"hotosm_([A-Z]{3})_(\w+)" + match = re.match(pattern, exportname) + if match: + iso3countrycode = match.group(1) + if iso3countrycode: + upload_name = f"HDX/{iso3countrycode}/{exportname}" + download_url = file_transfer_obj.upload( upload_file_path, upload_name, From 9a84dbe4aa87ba2534a865379a8478908d5a2628 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Sat, 14 Oct 2023 13:43:31 +0545 Subject: [PATCH 13/14] Comment addition --- API/api_worker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/API/api_worker.py b/API/api_worker.py index 601a6920..930ac111 100644 --- a/API/api_worker.py +++ b/API/api_worker.py @@ -84,7 +84,7 @@ def process_raw_data(self, params): if use_s3_to_upload: file_transfer_obj = S3FileTransfer() upload_name = exportname if params.uuid else f"Recurring/{exportname}" - if exportname.startswith("hotosm_project"): + if exportname.startswith("hotosm_project"): # TM if not params.uuid: pattern = r"(hotosm_project_)(\d+)" match = re.match(pattern, exportname) @@ -93,7 +93,7 @@ def process_raw_data(self, params): project_number = match.group(2) if project_number: upload_name = f"TM/{project_number}/{exportname}" - elif exportname.startswith("hotosm_"): + elif exportname.startswith("hotosm_"): # HDX if not params.uuid: pattern = r"hotosm_([A-Z]{3})_(\w+)" match = re.match(pattern, exportname) From 4c5c53efe6de8ae11bf1b73fa2740710176e1885 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Wed, 1 Nov 2023 12:21:53 +0545 Subject: [PATCH 14/14] added null index query --- backend/sql/null_index.sql | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 backend/sql/null_index.sql diff --git a/backend/sql/null_index.sql b/backend/sql/null_index.sql new file mode 100644 index 00000000..ab8e345a --- /dev/null +++ b/backend/sql/null_index.sql @@ -0,0 +1,7 @@ +CREATE INDEX CONCURRENTLY ways_poly_country_idx_null ON public.ways_poly USING gin (country gin__int_ops) WHERE country <@ ARRAY[0]; + +CREATE INDEX CONCURRENTLY ways_line_country_idx_null ON public.ways_line USING gin (country gin__int_ops) WHERE country <@ ARRAY[0]; + +CREATE INDEX CONCURRENTLY nodes_country_idx_null ON public.nodes USING gin (country gin__int_ops) WHERE country <@ ARRAY[0]; + +CREATE INDEX CONCURRENTLY relations_country_idx_null ON public.relations USING gin (country gin__int_ops) WHERE country <@ ARRAY[0];