From ec46fc5fb06c091efdbb0193e35ec3ed39ed1b25 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Sun, 14 Jan 2024 23:14:02 +0545 Subject: [PATCH 1/5] Uses Postgres by default for custom exports , Supports switching between duckdb and postgres --- API/hdx.py | 4 +- docs/src/installation/configurations.md | 2 + src/app.py | 168 ++++++++++++++++-------- src/config.py | 19 ++- src/query_builder/builder.py | 101 +++++++++----- 5 files changed, 194 insertions(+), 100 deletions(-) diff --git a/API/hdx.py b/API/hdx.py index 5ee7c1f0..72fbe74e 100644 --- a/API/hdx.py +++ b/API/hdx.py @@ -397,7 +397,7 @@ async def process_custom_requests( "addr:city", "source", ], - "where": "tags['amenity'] IN ('kindergarten', 'school', 'college', 'university') OR building IN ('kindergarten', 'school', 'college', 'university')", + "where": "tags['amenity'] IN ('kindergarten', 'school', 'college', 'university') OR tags['building'] IN ('kindergarten', 'school', 'college', 'university')", "formats": ["geojson"], } }, @@ -681,7 +681,7 @@ async def process_custom_requests( "addr:city", "source", ], - "where": "tags['amenity'] IN ('kindergarten', 'school', 'college', 'university') OR building IN ('kindergarten', 'school', 'college', 'university')", + "where": "tags['amenity'] IN ('kindergarten', 'school', 'college', 'university') OR tags['building'] IN ('kindergarten', 'school', 'college', 'university')", "formats": ["geojson", "shp", "kml"], } }, diff --git a/docs/src/installation/configurations.md b/docs/src/installation/configurations.md index bbc2317f..125ff052 100644 --- a/docs/src/installation/configurations.md +++ b/docs/src/installation/configurations.md @@ -61,6 +61,7 @@ The following are the different configuration options that are accepted. | `POLYGON_STATISTICS_API_URL` | `POLYGON_STATISTICS_API_RATE_LIMIT` | `[API_CONFIG]` | `5` | Rate limit to be applied for statistics endpoint per minute, Defaults to 5 request is allowed per minute | OPTIONAL | | `DEFAULT_SOFT_TASK_LIMIT` | `DEFAULT_SOFT_TASK_LIMIT` | `[API_CONFIG]` | `7200` | Soft task time limit signal for celery workers in seconds.It will gently remind celery to finish up the task and terminate, Defaults to 2 Hour| OPTIONAL | | `DEFAULT_HARD_TASK_LIMIT` | `DEFAULT_HARD_TASK_LIMIT` | `[API_CONFIG]` | `10800` | Hard task time limit signal for celery workers in seconds. It will immediately kill the celery task.Defaults to 3 Hour| OPTIONAL | +| `USE_DUCK_DB_FOR_CUSTOM_EXPORTS` | `USE_DUCK_DB_FOR_CUSTOM_EXPORTS` | `[API_CONFIG]` | `True` | Enable this setting to use duckdb , By default duck db is disabled and postgres is used| OPTIONAL | | `CELERY_BROKER_URL` | `CELERY_BROKER_URL` | `[CELERY]` | `redis://localhost:6379/0` | Redis connection string for the broker | OPTIONAL | | `CELERY_RESULT_BACKEND` | `CELERY_RESULT_BACKEND` | `[CELERY]` | `redis://localhost:6379/0` | Redis/psotgresql connection string for the the result backend, eg : db+postgresql://username:password@localhost:5432/db_name | OPTIONAL | | `FILE_UPLOAD_METHOD` | `FILE_UPLOAD_METHOD` | `[EXPORT_UPLOAD]` | `disk` | File upload method; Allowed values - disk, s3 | OPTIONAL | @@ -110,6 +111,7 @@ The following are the different configuration options that are accepted. | `POLYGON_STATISTICS_API_RATE_LIMIT` | `[API_CONFIG]` | Yes | No | | `DEFAULT_SOFT_TASK_LIMIT` | `[API_CONFIG]` | No | Yes | | `DEFAULT_HARD_TASK_LIMIT` | `[API_CONFIG]` | No | Yes | +| `USE_DUCK_DB_FOR_CUSTOM_EXPORTS` | `[API_CONFIG]` | Yes | Yes | | `CELERY_BROKER_URL` | `[CELERY]` | Yes | Yes | | `CELERY_RESULT_BACKEND` | `[CELERY]` | Yes | Yes | | `FILE_UPLOAD_METHOD` | `[EXPORT_UPLOAD]` | Yes | Yes | diff --git a/src/app.py b/src/app.py index 98878301..ded6af17 100644 --- a/src/app.py +++ b/src/app.py @@ -61,14 +61,19 @@ PROCESS_SINGLE_CATEGORY_IN_POSTGRES, ) from src.config import USE_CONNECTION_POOLING as use_connection_pooling -from src.config import USE_S3_TO_UPLOAD, get_db_connection_params, level +from src.config import ( + USE_DUCK_DB_FOR_CUSTOM_EXPORTS, + USE_S3_TO_UPLOAD, + get_db_connection_params, + level, +) from src.config import logger as logging from src.query_builder.builder import ( HDX_FILTER_CRITERIA, HDX_MARKDOWN, check_exisiting_country, check_last_updated_rawdata, - extract_features_duckdb, + extract_features_custom_exports, extract_geometry_type_query, generate_polygon_stats_graphql_query, get_countries_query, @@ -88,7 +93,8 @@ import logging as log if ENABLE_HDX_EXPORTS: - import duckdb + if USE_DUCK_DB_FOR_CUSTOM_EXPORTS is True: + import duckdb from hdx.data.dataset import Dataset from hdx.data.resource import Resource @@ -154,6 +160,37 @@ def dict_none_clean(to_clean): return result +def generate_ogr2ogr_cmd_from_psql( + export_file_path, + export_file_format_driver, + postgres_query, + layer_creation_options, + query_dump_path, +): + """ + Generates ogr2ogr command for postgresql queries + """ + db_items = get_db_connection_params() + os.makedirs(query_dump_path, exist_ok=True) + query_path = os.path.join(query_dump_path, "query.sql") + with open(query_path, "w", encoding="UTF-8") as file: + file.write(postgres_query) + ogr2ogr_cmd = """ogr2ogr -overwrite -f "{export_format}" {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" {layer_creation_options_str} -progress""".format( + export_format=export_file_format_driver, + export_path=export_file_path, + host=db_items.get("host"), + port=db_items.get("port"), + username=db_items.get("user"), + db=db_items.get("dbname"), + password=db_items.get("password"), + pg_sql_select=query_path, + layer_creation_options_str=f"-lco {layer_creation_options}" + if layer_creation_options + else "", + ) + return ogr2ogr_cmd + + def run_ogr2ogr_cmd(cmd): """Runs command and monitors the file size until the process runs @@ -1222,11 +1259,13 @@ def __init__(self, params): if os.path.exists(self.default_export_path): shutil.rmtree(self.default_export_path, ignore_errors=True) os.makedirs(self.default_export_path) - self.duck_db_db_path = os.path.join( - self.default_export_path, - f"{self.iso3 if self.iso3 else self.params.dataset.dataset_prefix}.db", - ) - self.duck_db_instance = DuckDB(self.duck_db_db_path) + + if USE_DUCK_DB_FOR_CUSTOM_EXPORTS is True: + self.duck_db_db_path = os.path.join( + self.default_export_path, + f"{self.iso3 if self.iso3 else self.params.dataset.dataset_prefix}.db", + ) + self.duck_db_instance = DuckDB(self.duck_db_db_path) def types_to_tables(self, type_list: list): """ @@ -1252,7 +1291,7 @@ def types_to_tables(self, type_list: list): return list(table_set) - def format_where_clause(self, where_clause): + def format_where_clause_duckdb(self, where_clause): """ Formats the where_clause by replacing the first occurrence of the pattern. @@ -1362,7 +1401,6 @@ def query_to_file(self, query, category_name, feature_type, export_formats): self.default_export_path, category_name, feature_type ) resources = [] - start_export_formats_time = time.time() def process_export_format(export_format): export_format = EXPORT_TYPE_MAPPING.get(export_format) @@ -1388,8 +1426,21 @@ def process_export_format(export_format): if export_format.layer_creation_options else "" ) - executable_query = f"""COPY ({query.strip()}) TO '{export_file_path}' WITH (FORMAT {export_format.format_option}{f", DRIVER '{export_format.driver_name}'{f', LAYER_CREATION_OPTIONS {layer_creation_options_str}' if layer_creation_options_str else ''}" if export_format.format_option == 'GDAL' else ''})""" - self.duck_db_instance.run_query(executable_query.strip(), load_spatial=True) + if USE_DUCK_DB_FOR_CUSTOM_EXPORTS is True: + executable_query = f"""COPY ({query.strip()}) TO '{export_file_path}' WITH (FORMAT {export_format.format_option}{f", DRIVER '{export_format.driver_name}'{f', LAYER_CREATION_OPTIONS {layer_creation_options_str}' if layer_creation_options_str else ''}" if export_format.format_option == 'GDAL' else ''})""" + self.duck_db_instance.run_query( + executable_query.strip(), load_spatial=True + ) + else: + ogr2ogr_cmd = generate_ogr2ogr_cmd_from_psql( + export_file_path=export_file_path, + export_file_format_driver=export_format.driver_name, + postgres_query=query.strip(), + layer_creation_options=layer_creation_options_str, + query_dump_path=export_format_path, + ) + run_ogr2ogr_cmd(ogr2ogr_cmd) + zip_file_path = os.path.join(file_export_path, f"{export_filename}.zip") zip_path = self.file_to_zip(export_format_path, zip_file_path) @@ -1469,12 +1520,13 @@ def process_category(self, category): logging.info("Started Processing %s", category_name) all_uploaded_resources = [] for feature_type in category_data.types: - extract_query = extract_features_duckdb( + extract_query = extract_features_custom_exports( self.iso3 if self.iso3 else self.params.dataset.dataset_prefix, category_data.select, feature_type, - self.format_where_clause(category_data.where), + self.format_where_clause_duckdb(category_data.where) if USE_DUCK_DB_FOR_CUSTOM_EXPORTS is True else category_data.where, geometry=self.params.geometry if self.params.geometry else None, + cid=self.cid, ) resources = self.query_to_file( extract_query, @@ -1574,36 +1626,39 @@ def process_hdx_tags(self): self.params.categories = [ category for category in self.params.categories if category ] - table_type = [ - cat_type - for category in self.params.categories - if category - for cat_type in list(category.values())[0].types - ] - where_0_category = None + if USE_DUCK_DB_FOR_CUSTOM_EXPORTS is True: + table_type = [ + cat_type + for category in self.params.categories + if category + for cat_type in list(category.values())[0].types + ] + where_0_category = None - if len(self.params.categories) == 1 and PROCESS_SINGLE_CATEGORY_IN_POSTGRES: - where_0_category = list(self.params.categories[0].values())[0].where + if len(self.params.categories) == 1 and PROCESS_SINGLE_CATEGORY_IN_POSTGRES: + where_0_category = list(self.params.categories[0].values())[0].where - table_names = self.types_to_tables(list(set(table_type))) - base_table_name = self.iso3 if self.iso3 else self.params.dataset.dataset_prefix - for table in table_names: - create_table = postgres2duckdb_query( - base_table_name=base_table_name, - table=table, - cid=self.cid, - geometry=self.params.geometry, - single_category_where=where_0_category, - ) - logging.debug(create_table) - start = time.time() - logging.info("Transfer-> Postgres Data to DuckDB Started : %s", table) - self.duck_db_instance.run_query(create_table.strip(), attach_pgsql=True) - logging.info( - "Transfer-> Postgres Data to DuckDB : %s Done in %s", - table, - humanize.naturaldelta(timedelta(seconds=(time.time() - start))), + table_names = self.types_to_tables(list(set(table_type))) + base_table_name = ( + self.iso3 if self.iso3 else self.params.dataset.dataset_prefix ) + for table in table_names: + create_table = postgres2duckdb_query( + base_table_name=base_table_name, + table=table, + cid=self.cid, + geometry=self.params.geometry, + single_category_where=where_0_category, + ) + logging.debug(create_table) + start = time.time() + logging.info("Transfer-> Postgres Data to DuckDB Started : %s", table) + self.duck_db_instance.run_query(create_table.strip(), attach_pgsql=True) + logging.info( + "Transfer-> Postgres Data to DuckDB : %s Done in %s", + table, + humanize.naturaldelta(timedelta(seconds=(time.time() - start))), + ) CategoryResult = namedtuple( "CategoryResult", ["category", "uploaded_resources"] @@ -1649,21 +1704,22 @@ def process_hdx_tags(self): result = {"datasets": dataset_results} if self.params.meta: - logging.info("Dumping Duck DB to Parquet") - db_dump_path = os.path.join( - self.default_export_path, - "DB_DUMP", - ) - os.makedirs(db_dump_path, exist_ok=True) - export_db = f"""EXPORT DATABASE '{db_dump_path}' (FORMAT PARQUET, COMPRESSION ZSTD, ROW_GROUP_SIZE 100000);""" - self.duck_db_instance.run_query(export_db, load_spatial=True) - db_zip_download_url = self.upload_to_s3( - self.file_to_zip( - working_dir=db_dump_path, - zip_path=os.path.join(self.default_export_path, "dbdump.zip"), + if USE_DUCK_DB_FOR_CUSTOM_EXPORTS is True: + logging.info("Dumping Duck DB to Parquet") + db_dump_path = os.path.join( + self.default_export_path, + "DB_DUMP", ) - ) - result["db_dump"] = db_zip_download_url + os.makedirs(db_dump_path, exist_ok=True) + export_db = f"""EXPORT DATABASE '{db_dump_path}' (FORMAT PARQUET, COMPRESSION ZSTD, ROW_GROUP_SIZE 100000);""" + self.duck_db_instance.run_query(export_db, load_spatial=True) + db_zip_download_url = self.upload_to_s3( + self.file_to_zip( + working_dir=db_dump_path, + zip_path=os.path.join(self.default_export_path, "dbdump.zip"), + ) + ) + result["db_dump"] = db_zip_download_url processing_time_close = time.time() result["elapsed_time"] = humanize.naturaldelta( timedelta(seconds=(processing_time_close - processing_time_start)) @@ -1671,7 +1727,7 @@ def process_hdx_tags(self): result["started_at"] = started_at meta_last_run_dump_path = os.path.join(self.default_export_path, "meta.json") - with open(meta_last_run_dump_path, "w") as json_file: + with open(meta_last_run_dump_path, "w", encoding="UTF-8") as json_file: json.dump(result, json_file, indent=4) self.upload_to_s3(resource_path=meta_last_run_dump_path) self.clean_resources() diff --git a/src/config.py b/src/config.py index 89fda680..fe85332a 100644 --- a/src/config.py +++ b/src/config.py @@ -191,6 +191,11 @@ def not_raises(func, *args, **kwargs): "API_CONFIG", "DEFAULT_HARD_TASK_LIMIT", fallback=3 * 60 * 60 ) +USE_DUCK_DB_FOR_CUSTOM_EXPORTS = os.environ.get( + "USE_DUCK_DB_FOR_CUSTOM_EXPORTS" +) or config.getboolean("API_CONFIG", "USE_DUCK_DB_FOR_CUSTOM_EXPORTS", fallback=False) + + HDX_SOFT_TASK_LIMIT = os.environ.get("HDX_SOFT_TASK_LIMIT") or config.get( "HDX", "HDX_SOFT_TASK_LIMIT", fallback=5 * 60 * 60 ) @@ -265,13 +270,13 @@ def not_raises(func, *args, **kwargs): else None ) ) - - DUCK_DB_MEMORY_LIMIT = os.environ.get("DUCK_DB_MEMORY_LIMIT") or config.get( - "HDX", "DUCK_DB_MEMORY_LIMIT", fallback=None - ) - DUCK_DB_THREAD_LIMIT = os.environ.get("DUCK_DB_THREAD_LIMIT") or config.get( - "HDX", "DUCK_DB_THREAD_LIMIT", fallback=None - ) + if USE_DUCK_DB_FOR_CUSTOM_EXPORTS: + DUCK_DB_MEMORY_LIMIT = os.environ.get("DUCK_DB_MEMORY_LIMIT") or config.get( + "HDX", "DUCK_DB_MEMORY_LIMIT", fallback=None + ) + DUCK_DB_THREAD_LIMIT = os.environ.get("DUCK_DB_THREAD_LIMIT") or config.get( + "HDX", "DUCK_DB_THREAD_LIMIT", fallback=None + ) def get_db_connection_params() -> dict: diff --git a/src/query_builder/builder.py b/src/query_builder/builder.py index 5268754c..6bdae952 100644 --- a/src/query_builder/builder.py +++ b/src/query_builder/builder.py @@ -20,8 +20,9 @@ import re from json import dumps, loads -from geomet import wkb, wkt +from geomet import wkt +from src.config import USE_DUCK_DB_FOR_CUSTOM_EXPORTS from src.config import logger as logging from src.validation.models import SupportedFilters, SupportedGeometryFilters @@ -118,14 +119,19 @@ def remove_spaces(input_str): def create_column_filter( - columns, create_schema=False, output_type="geojson", use_centroid=False + columns, + create_schema=False, + output_type="geojson", + use_centroid=False, + include_osm_type=True, ): """generates column filter , which will be used to filter column in output will be used on select query - Rawdata extraction""" if len(columns) > 0: filter_col = [] filter_col.append("osm_id") - filter_col.append("tableoid::regclass AS osm_type") + if include_osm_type: + filter_col.append("tableoid::regclass AS osm_type") if create_schema: schema = {} @@ -147,7 +153,6 @@ def create_column_filter( filter_col.append("ST_X(ST_Centroid(geom)) as longitude") filter_col.append("ST_Y(ST_Centroid(geom)) as latitude") filter_col.append("GeometryType(geom) as geom_type") - else: filter_col.append("ST_Centroid(geom) as geom" if use_centroid else "geom") select_condition = " , ".join(filter_col) @@ -732,9 +737,7 @@ def raw_extract_plain_geojson(params, inspect_only=False): if params.geometry_type == "polygon": geom_filter_cond = """ and (geometrytype(geom)='POLYGON' or geometrytype(geom)='MULTIPOLYGON')""" select_condition = create_column_filter(columns=params.select) - where_condition = generate_tag_filter_query( - params.where, params.join_by, user_for_geojson=True - ) + where_condition = generate_tag_filter_query(params.where, params.join_by) if params.bbox: xmin, ymin, xmax, ymax = ( params.bbox[0], @@ -856,6 +859,16 @@ def get_country_from_iso(iso3): return query +def convert_tags_pattern_to_postgres(query_string): + pattern = r"tags\['(.*?)'\]" + + converted_query = re.sub( + pattern, lambda match: f"tags->>'{match.group(1)}'", query_string + ) + + return converted_query + + def postgres2duckdb_query( base_table_name, table, @@ -878,23 +891,12 @@ def postgres2duckdb_query( Returns: str: DuckDB query for creating a table. """ - select_query = """osm_id, osm_type, version, changeset, timestamp, tags, ST_AsBinary(geom) as geometry""" - create_select_duck_db = """osm_id, osm_type , version, changeset, timestamp, cast(tags::json AS map(varchar, varchar)) AS tags, cast(ST_GeomFromWKB(geometry) as GEOMETRY) AS geometry""" + select_query = """osm_id, osm_type, version, changeset, timestamp, tags, ST_AsBinary(geom) as geom""" + create_select_duck_db = """osm_id, osm_type , version, changeset, timestamp, cast(tags::json AS map(varchar, varchar)) AS tags, cast(ST_GeomFromWKB(geom) as GEOMETRY) AS geom""" if enable_users_detail: - select_query = """osm_id, osm_type, uid, user, version, changeset, timestamp, tags, ST_AsBinary(geom) as geometry""" - create_select_duck_db = """osm_id, osm_type, uid, user, version, changeset, timestamp, cast(tags::json AS map(varchar, varchar)) AS tags, cast(ST_GeomFromWKB(geometry) as GEOMETRY) AS geometry""" - - def convert_tags_pattern(query_string): - # Define the pattern to search for - pattern = r"tags\['(.*?)'\]" - - # Use a lambda function as the replacement to convert the pattern - converted_query = re.sub( - pattern, lambda match: f"tags->>'{match.group(1)}'", query_string - ) - - return converted_query + select_query = """osm_id, osm_type, uid, user, version, changeset, timestamp, tags, ST_AsBinary(geom) as geom""" + create_select_duck_db = """osm_id, osm_type, uid, user, version, changeset, timestamp, cast(tags::json AS map(varchar, varchar)) AS tags, cast(ST_GeomFromWKB(geom) as GEOMETRY) AS geom""" row_filter_condition = ( f"""(country <@ ARRAY [{cid}])""" @@ -904,16 +906,34 @@ def convert_tags_pattern(query_string): postgres_query = f"""select {select_query} from (select * , tableoid::regclass as osm_type from {table} where {row_filter_condition}) as sub_query""" if single_category_where: - postgres_query += f" where {convert_tags_pattern(single_category_where)}" + postgres_query += ( + f" where {convert_tags_pattern_to_postgres(single_category_where)}" + ) duck_db_create = f"""CREATE TABLE {base_table_name}_{table} AS SELECT {create_select_duck_db} FROM postgres_query("postgres_db", "{postgres_query}") """ return duck_db_create -def extract_features_duckdb(base_table_name, select, feature_type, where, geometry): +def extract_custom_features_from_postgres( + select_q, from_q, where_q, geom=None, cid=None +): + """ + Generates Postgresql query for custom feature extraction """ - Generate a DuckDB query to extract features based on given parameters. + geom_filter = f"""(country <@ ARRAY [{cid}])""" if cid else create_geom_filter(geom) + + postgres_query = f"""select {select_q} from (select * , tableoid::regclass as osm_type from {from_q} where {geom_filter}) as sub_query""" + if where_q: + postgres_query += f" where {convert_tags_pattern_to_postgres(where_q)}" + return postgres_query + + +def extract_features_custom_exports( + base_table_name, select, feature_type, where, geometry=None, cid=None +): + """ + Generate a Extraction query to extract features based on given parameters. Args: - base_table_name (str): Base table name. @@ -922,7 +942,7 @@ def extract_features_duckdb(base_table_name, select, feature_type, where, geomet - where (str): SQL-like condition to filter features. Returns: - str: DuckDB query to extract features. + str: Extraction query to extract features. """ map_tables = { "points": {"table": ["nodes"], "where": {"nodes": f"({where})"}}, @@ -930,30 +950,41 @@ def extract_features_duckdb(base_table_name, select, feature_type, where, geomet "table": ["ways_line", "relations"], "where": { "ways_line": where, - "relations": f"({where}) and (ST_GeometryType(geometry)='MULTILINESTRING')", + "relations": f"({where}) and (ST_GeometryType(geom)='MULTILINESTRING')", }, }, "polygons": { "table": ["ways_poly", "relations"], "where": { "ways_poly": where, - "relations": f"({where}) and (ST_GeometryType(geometry)='MULTIPOLYGON' or ST_GeometryType(geometry)='POLYGON')", + "relations": f"({where}) and (ST_GeometryType(geom)='MULTIPOLYGON' or ST_GeometryType(geom)='POLYGON')", }, }, } - - select = [f"tags['{item}'][1] as '{item}'" for item in select] - select += ["osm_id", "osm_type", "geometry"] - select_query = ", ".join(select) + if USE_DUCK_DB_FOR_CUSTOM_EXPORTS: + select = [f"tags['{item}'][1] as '{item}'" for item in select] + select += ["osm_id", "osm_type", "geom"] + select_query = ", ".join(select) + else: + select_query = create_column_filter(select, include_osm_type=False) from_query = map_tables[feature_type]["table"] base_query = [] for table in from_query: where_query = map_tables[feature_type]["where"][table] - if geometry: - where_query += f" and (ST_Intersects(geometry,ST_GeomFromGeoJSON('{geometry.json()}')))" - query = f"""select {select_query} from {f"{base_table_name}_{table}"} where {where_query}""" + if USE_DUCK_DB_FOR_CUSTOM_EXPORTS: + if geometry: + where_query += f" and (ST_Intersects(geom,ST_GeomFromGeoJSON('{geometry.json()}')))" + query = f"""select {select_query} from {f"{base_table_name}_{table}"} where {where_query}""" + else: + query = extract_custom_features_from_postgres( + select_q=select_query, + from_q=table, + where_q=where_query, + geom=geometry, + cid=cid, + ) base_query.append(query) return " UNION ALL ".join(base_query) From 4645c9fbea7fa09a9a10b83524fa7e267c75ae6f Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Sun, 14 Jan 2024 23:18:11 +0545 Subject: [PATCH 2/5] Format app.py --- src/app.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index ded6af17..f4ea545b 100644 --- a/src/app.py +++ b/src/app.py @@ -1524,7 +1524,9 @@ def process_category(self, category): self.iso3 if self.iso3 else self.params.dataset.dataset_prefix, category_data.select, feature_type, - self.format_where_clause_duckdb(category_data.where) if USE_DUCK_DB_FOR_CUSTOM_EXPORTS is True else category_data.where, + self.format_where_clause_duckdb(category_data.where) + if USE_DUCK_DB_FOR_CUSTOM_EXPORTS is True + else category_data.where, geometry=self.params.geometry if self.params.geometry else None, cid=self.cid, ) From f1dcb79d934e0d3606752ca3cdbff03b8b227dbe Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Sun, 14 Jan 2024 23:27:59 +0545 Subject: [PATCH 3/5] Fix Unit tests --- src/app.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/app.py b/src/app.py index f4ea545b..510f73e2 100644 --- a/src/app.py +++ b/src/app.py @@ -95,16 +95,14 @@ if ENABLE_HDX_EXPORTS: if USE_DUCK_DB_FOR_CUSTOM_EXPORTS is True: import duckdb + from src.config import ( + DUCK_DB_MEMORY_LIMIT, + DUCK_DB_THREAD_LIMIT,) + from hdx.data.dataset import Dataset from hdx.data.resource import Resource - from src.config import ( - DUCK_DB_MEMORY_LIMIT, - DUCK_DB_THREAD_LIMIT, - HDX_MAINTAINER, - HDX_OWNER_ORG, - HDX_URL_PREFIX, - ) + from src.config import HDX_MAINTAINER, HDX_OWNER_ORG, HDX_URL_PREFIX global LOCAL_CON_POOL From aac3a6b685b870025150eff54c918e795fe6e5db Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Sun, 14 Jan 2024 23:32:14 +0545 Subject: [PATCH 4/5] Format app.py using black --- src/app.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/app.py b/src/app.py index 510f73e2..13f922dd 100644 --- a/src/app.py +++ b/src/app.py @@ -96,8 +96,9 @@ if USE_DUCK_DB_FOR_CUSTOM_EXPORTS is True: import duckdb from src.config import ( - DUCK_DB_MEMORY_LIMIT, - DUCK_DB_THREAD_LIMIT,) + DUCK_DB_MEMORY_LIMIT, + DUCK_DB_THREAD_LIMIT, + ) from hdx.data.dataset import Dataset from hdx.data.resource import Resource From 49c3ab2cc418cb1f447428ee2f04fc48ac22eec0 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Sun, 14 Jan 2024 23:36:25 +0545 Subject: [PATCH 5/5] Fix typo on where filter --- tests/test_API.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_API.py b/tests/test_API.py index a4e43863..f1a7dee6 100644 --- a/tests/test_API.py +++ b/tests/test_API.py @@ -1201,7 +1201,7 @@ def test_full_hdx_set_iso(): "addr:city", "source", ], - "where": "tags['amenity'] IN ('kindergarten', 'school', 'college', 'university') OR building IN ('kindergarten', 'school', 'college', 'university')", + "where": "tags['amenity'] IN ('kindergarten', 'school', 'college', 'university') OR tags['building'] IN ('kindergarten', 'school', 'college', 'university')", "formats": ["geojson"], } },