Skip to content

Commit

Permalink
Get only filtered feature if it is a single category
Browse files Browse the repository at this point in the history
  • Loading branch information
kshitijrajsharma committed Dec 25, 2023
1 parent b14a747 commit 88c4c23
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 9 deletions.
20 changes: 14 additions & 6 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -1564,20 +1564,28 @@ def process_hdx_tags(self):
for category in self.params.categories
for cat_type in list(category.values())[0].types
]
where_0_category = None
if len(self.params.categories) == 1:
where_0_category = list(self.params.categories[0].values())[0].where

table_names = self.types_to_tables(list(set(table_type)))
base_table_name = self.iso3 if self.iso3 else self.params.dataset.dataset_prefix
for table in table_names:
create_table = postgres2duckdb_query(
base_table_name,
table,
self.cid,
self.params.geometry,
base_table_name=base_table_name,
table=table,
cid=self.cid,
geometry=self.params.geometry,
single_category_where=where_0_category,
)
print(create_table)
start = time.time()
logging.info(f"Transfer-> Postgres Data to DuckDB Started : {table}")
logging.info("Transfer-> Postgres Data to DuckDB Started : %s", table)
self.duck_db_instance.run_query(create_table.strip(), attach_pgsql=True)
logging.info(
f"Transfer-> Postgres Data to DuckDB : {table} Done in {humanize.naturaldelta(timedelta(seconds=(time.time()-start)))}s"
"Transfer-> Postgres Data to DuckDB : %s Done in %s s",
table,
humanize.naturaldelta(timedelta(seconds=(time.time() - start))),
)

CategoryResult = namedtuple(
Expand Down
25 changes: 22 additions & 3 deletions src/query_builder/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,12 @@ def get_country_from_iso(iso3):


def postgres2duckdb_query(
base_table_name, table, cid=None, geometry=None, enable_users_detail=False
base_table_name,
table,
cid=None,
geometry=None,
single_category_where=None,
enable_users_detail=False,
):
"""
Generate a DuckDB query to create a table from a PostgreSQL query.
Expand All @@ -866,6 +871,7 @@ def postgres2duckdb_query(
- table (str): PostgreSQL table name.
- cid (int, optional): Country ID for filtering. Defaults to None.
- geometry (Polygon, optional): Custom polygon geometry. Defaults to None.
- single_category_where (str, optional): Where clause for single category to fetch it from postgres
- enable_users_detail (bool, optional): Enable user details. Defaults to False.
Returns:
Expand All @@ -878,11 +884,24 @@ def postgres2duckdb_query(
select_query = """osm_id, tableoid::regclass AS type, uid, user, version, changeset, timestamp, tags, ST_AsBinary(geom) as geometry"""
create_select_duck_db = """osm_id, type, uid, user, version, changeset, timestamp,type, cast(tags::json AS map(varchar, varchar)) AS tags, cast(ST_GeomFromWKB(geometry) as GEOMETRY) AS geometry"""

def convert_tags_pattern(query_string):
# Define the pattern to search for
pattern = r"tags\['(.*?)'\]"

# Use a lambda function as the replacement to convert the pattern
converted_query = re.sub(
pattern, lambda match: f"tags->>'{match.group(1)}'", query_string
)

return converted_query

row_filter_condition = (
f"""country <@ ARRAY [{cid}]"""
f"""(country <@ ARRAY [{cid}])"""
if cid
else f"""ST_within(geom,ST_GeomFromText('{wkt.dumps(loads(geometry.json()))}',4326))"""
else f"""(ST_within(geom,ST_GeomFromText('{wkt.dumps(loads(geometry.json()))}',4326)))"""
)
if single_category_where:
row_filter_condition += f" and ({convert_tags_pattern(single_category_where)})"

duck_db_create = f"""CREATE TABLE {base_table_name}_{table} AS SELECT {create_select_duck_db} FROM postgres_query("postgres_db", "SELECT {select_query} FROM {table} WHERE {row_filter_condition}") """

Expand Down

0 comments on commit 88c4c23

Please sign in to comment.