From 1e2d02527fdd44fe5a4ede7b65f2a875f5ee269f Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Tue, 19 Dec 2023 16:30:56 +0545
Subject: [PATCH 01/20] Added HDX exporter class and converted legacy yaml to
 json format

---
 API/hdx.py                   | 440 +++++++++++++++++++++++++++++++++++
 API/main.py                  |   2 +
 API/stats.py                 |   2 +-
 requirements.txt             |   5 +-
 src/app.py                   | 248 +++++++++++++++++++-
 src/config.py                |  20 ++
 src/query_builder/builder.py |  68 ++++++
 src/validation/models.py     |  36 ++-
 8 files changed, 804 insertions(+), 17 deletions(-)
 create mode 100644 API/hdx.py

diff --git a/API/hdx.py b/API/hdx.py
new file mode 100644
index 00000000..3d8703b8
--- /dev/null
+++ b/API/hdx.py
@@ -0,0 +1,440 @@
+from enum import Enum
+from typing import Dict, List
+
+from fastapi import APIRouter, Body, Query, Request
+from fastapi_versioning import version
+from pydantic import BaseModel, Field, validator
+
+from src.app import HDX
+from src.config import LIMITER as limiter
+from src.config import RATE_LIMIT_PER_MIN
+
+router = APIRouter(prefix="/hdx", tags=["HDX"])
+
+
+class HDXModel(BaseModel):
+    tags: List[str] = Field(
+        ...,
+        description="List of tags for the HDX model.",
+        example=["roads", "transportation", "geodata"],
+    )
+    caveats: str = Field(
+        ...,
+        description="Caveats for the HDX model.",
+        example="OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+    )
+
+
+class CategoryModel(BaseModel):
+    hdx: HDXModel
+    types: List[str] = Field(
+        ...,
+        description="List of feature types (points, lines, polygons).",
+        example=["lines"],
+    )
+    select: List[str] = Field(
+        ...,
+        description="List of selected fields.",
+        example=["name", "highway"],
+    )
+    where: str = Field(
+        ...,
+        description="SQL-like condition to filter features.",
+        example="highway IS NOT NULL",
+    )
+    formats: List[str] = Field(
+        ...,
+        description="List of Export Formats (suffixes).",
+        example=["gpkg", "fgb"],
+    )
+
+    @validator("types")
+    def validate_types(cls, value):
+        allowed_types = {"points", "lines", "polygons"}
+        for item in value:
+            if item not in allowed_types:
+                raise ValueError(
+                    f"Invalid type: {item}. Allowed types are {', '.join(allowed_types)}"
+                )
+        return value
+
+    @validator("formats")
+    def validate_export_types(cls, value):
+        for export_type in value:
+            if export_type not in EXPORT_TYPE_MAPPING:
+                raise ValueError(f"Unsupported export type: {export_type}")
+        return [EXPORT_TYPE_MAPPING[export_type] for export_type in value]
+
+
+class ExportTypeInfo:
+    def __init__(self, suffix, driver_name, layer_creation_options, format_option):
+        self.suffix = suffix
+        self.driver_name = driver_name
+        self.layer_creation_options = layer_creation_options
+        self.format_option = format_option
+
+
+EXPORT_TYPE_MAPPING = {
+    "geojson": ExportTypeInfo("geojson", "GeoJSON", [], "GDAL"),
+    "shp": ExportTypeInfo("shp", "ESRI Shapefile", [], "GDAL"),
+    "gpkg": ExportTypeInfo("gpkg", "GeoPackage", [], "GDAL"),
+    "sqlite": ExportTypeInfo("sqlite", "SQLite", [], "GDAL"),
+    "fgb": ExportTypeInfo("fgb", "FlatGeobuf", ["VERIFY_BUFFERS=NO"], "GDAL"),
+    "mvt": ExportTypeInfo("mvt", "MVT", [], "GDAL"),
+    "kl": ExportTypeInfo("kml", "KML", [], "GDAL"),
+    "gpx": ExportTypeInfo("gpx", "GPX", [], "GDAL"),
+    "parquet": ExportTypeInfo("parquet", "PARQUET", [], "PARQUET"),
+}
+
+
+class DynamicCategoriesModel(BaseModel):
+    iso3: str = Field(
+        ...,
+        description="ISO3 Country Code.",
+        min_length=3,
+        max_length=3,
+        example="USA",
+    )
+
+    categories: List[Dict[str, CategoryModel]] = Field(
+        ...,
+        description="List of dynamic categories.",
+        example=[
+            {
+                "Roads": {
+                    "hdx": {
+                        "tags": ["roads", "transportation", "geodata"],
+                        "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                    },
+                    "types": ["lines", "polygons"],
+                    "select": ["name", "highway"],
+                    "where": "highway IS NOT NULL",
+                    "formats": ["fgb"],
+                }
+            }
+        ],
+    )
+
+
+@router.post("/submit/")
+@limiter.limit(f"{RATE_LIMIT_PER_MIN}/minute")
+@version(1)
+async def process_data(
+    request: Request,
+    params: DynamicCategoriesModel = Body(
+        ...,
+        description="Input parameters including ISO3 country code and dynamic categories.",
+        examples={
+            "normal": {
+                "summary": "Example: Road extraction set",
+                "description": "Query to extract road in Nepal",
+                "value": {
+                    "iso3": "NPL",
+                    "categories": [
+                        {
+                            "Roads": {
+                                "hdx": {
+                                    "tags": ["roads", "transportation", "geodata"],
+                                    "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                                },
+                                "types": ["lines"],
+                                "select": ["name", "highway"],
+                                "where": "tags['highway'][1] IS NOT NULL",
+                                "formats": ["fgb"],
+                            }
+                        }
+                    ],
+                },
+            },
+            "fullset": {
+                "summary": "Full HDX Dataset default",
+                "description": "Full yaml conversion for dataset",
+                "value": {
+                    "iso3": "NPL",
+                    "categories": [
+                        {
+                            "Buildings": {
+                                "hdx": {
+                                    "tags": [
+                                        "facilities-infrastructure",
+                                        "geodata",
+                                    ],
+                                    "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                                },
+                                "types": ["polygons"],
+                                "select": [
+                                    "name",
+                                    "building",
+                                    "building:levels",
+                                    "building:materials",
+                                    "addr:full",
+                                    "addr:housenumber",
+                                    "addr:street",
+                                    "addr:city",
+                                    "office",
+                                    "source",
+                                ],
+                                "where": "tags['building'][1] IS NOT NULL",
+                                "formats": ["fgb"],
+                            }
+                        },
+                        {
+                            "Roads": {
+                                "hdx": {
+                                    "tags": ["transportation", "geodata"],
+                                    "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                                },
+                                "types": ["lines"],
+                                "select": [
+                                    "name",
+                                    "highway",
+                                    "surface",
+                                    "smoothness",
+                                    "width",
+                                    "lanes",
+                                    "oneway",
+                                    "bridge",
+                                    "layer",
+                                    "source",
+                                ],
+                                "where": "tags['highway'][1] IS NOT NULL",
+                                "formats": ["fgb"],
+                            }
+                        },
+                        {
+                            "Waterways": {
+                                "hdx": {
+                                    "tags": ["hydrology", "geodata"],
+                                    "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                                },
+                                "types": ["lines", "polygons"],
+                                "select": [
+                                    "name",
+                                    "waterway",
+                                    "covered",
+                                    "width",
+                                    "depth",
+                                    "layer",
+                                    "blockage",
+                                    "tunnel",
+                                    "natural",
+                                    "water",
+                                    "source",
+                                ],
+                                "where": "tags['waterway'][1] IS NOT NULL OR tags['water'][1] IS NOT NULL OR tags['natural'][1] IN ('water','wetland','bay')",
+                                "formats": ["fgb"],
+                            }
+                        },
+                        {
+                            "Points of Interest": {
+                                "hdx": {
+                                    "tags": [
+                                        "facilities-infrastructure",
+                                        "points-of-interest-poi",
+                                        "geodata",
+                                    ],
+                                    "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                                },
+                                "types": ["points", "polygons"],
+                                "select": [
+                                    "name",
+                                    "amenity",
+                                    "man_made",
+                                    "shop",
+                                    "tourism",
+                                    "opening_hours",
+                                    "beds",
+                                    "rooms",
+                                    "addr:full",
+                                    "addr:housenumber",
+                                    "addr:street",
+                                    "addr:city",
+                                    "source",
+                                ],
+                                "where": "tags['amenity'][1] IS NOT NULL OR tags['man_made'][1] IS NOT NULL OR tags['shop'][1] IS NOT NULL OR tags['tourism'][1] IS NOT NULL",
+                                "formats": ["fgb"],
+                            }
+                        },
+                        {
+                            "Airports": {
+                                "hdx": {
+                                    "tags": [
+                                        "aviation",
+                                        "facilities-infrastructure",
+                                        "geodata",
+                                    ],
+                                    "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                                },
+                                "types": ["points", "lines", "polygons"],
+                                "select": [
+                                    "name",
+                                    "aeroway",
+                                    "building",
+                                    "emergency",
+                                    "emergency:helipad",
+                                    "operator:type",
+                                    "capacity:persons",
+                                    "addr:full",
+                                    "addr:city",
+                                    "source",
+                                ],
+                                "where": "tags['aeroway'][1] IS NOT NULL OR tags['building'][1] = 'aerodrome' OR tags['emergency:helipad'][1] IS NOT NULL OR tags['emergency'][1] = 'landing_site'",
+                                "formats": ["fgb"],
+                            }
+                        },
+                        {
+                            "Sea Ports": {
+                                "hdx": {
+                                    "tags": [
+                                        "facilities-infrastructure",
+                                        "geodata",
+                                    ],
+                                    "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                                },
+                                "types": ["points", "lines", "polygons"],
+                                "select": [
+                                    "name",
+                                    "amenity",
+                                    "building",
+                                    "port",
+                                    "operator:type",
+                                    "addr:full",
+                                    "addr:city",
+                                    "source",
+                                ],
+                                "where": "tags['amenity'][1] = 'ferry_terminal' OR tags['building'][1] = 'ferry_terminal' OR tags['port'][1] IS NOT NULL",
+                                "formats": ["fgb"],
+                            }
+                        },
+                        {
+                            "Education Facilities": {
+                                "hdx": {
+                                    "tags": [
+                                        "education-facilities-schools",
+                                        "geodata",
+                                    ],
+                                    "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                                },
+                                "types": ["points", "polygons"],
+                                "select": [
+                                    "name",
+                                    "amenity",
+                                    "building",
+                                    "operator:type",
+                                    "capacity:persons",
+                                    "addr:full",
+                                    "addr:city",
+                                    "source",
+                                ],
+                                "where": "tags['amenity'][1] IN ('kindergarten', 'school', 'college', 'university') OR building IN ('kindergarten', 'school', 'college', 'university')",
+                                "formats": ["fgb"],
+                            }
+                        },
+                        {
+                            "Health Facilities": {
+                                "hdx": {
+                                    "tags": ["geodata"],
+                                    "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                                },
+                                "types": ["points", "polygons"],
+                                "select": [
+                                    "name",
+                                    "amenity",
+                                    "building",
+                                    "healthcare",
+                                    "healthcare:speciality",
+                                    "operator:type",
+                                    "capacity:persons",
+                                    "addr:full",
+                                    "addr:city",
+                                    "source",
+                                ],
+                                "where": "tags['healthcare'][1] IS NOT NULL OR tags['amenity'][1] IN ('doctors', 'dentist', 'clinic', 'hospital', 'pharmacy')",
+                                "formats": ["fgb"],
+                            }
+                        },
+                        {
+                            "Populated Places": {
+                                "hdx": {
+                                    "tags": [
+                                        "populated-places-settlements",
+                                        "geodata",
+                                    ],
+                                    "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                                },
+                                "types": ["points"],
+                                "select": [
+                                    "name",
+                                    "place",
+                                    "population",
+                                    "is_in",
+                                    "source",
+                                ],
+                                "where": "tags['place'][1] IN ('isolated_dwelling', 'town', 'village', 'hamlet', 'city')",
+                                "formats": ["fgb"],
+                            }
+                        },
+                        {
+                            "Financial Services": {
+                                "hdx": {
+                                    "tags": ["economics", "geodata"],
+                                    "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                                },
+                                "types": ["points", "polygons"],
+                                "select": [
+                                    "name",
+                                    "amenity",
+                                    "operator",
+                                    "network",
+                                    "addr:full",
+                                    "addr:city",
+                                    "source",
+                                ],
+                                "where": "tags['amenity'][1] IN ('mobile_money_agent','bureau_de_change','bank','microfinance','atm','sacco','money_transfer','post_office')",
+                                "formats": ["fgb"],
+                            }
+                        },
+                        {
+                            "Railways": {
+                                "hdx": {
+                                    "tags": [
+                                        "facilities-infrastructure",
+                                        "railways",
+                                        "transportation",
+                                        "geodata",
+                                    ],
+                                    "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                                },
+                                "types": ["lines"],
+                                "select": [
+                                    "name",
+                                    "railway",
+                                    "ele",
+                                    "operator:type",
+                                    "layer",
+                                    "addr:full",
+                                    "addr:city",
+                                    "source",
+                                ],
+                                "where": "tags['railway'][1] IN ('rail','station')",
+                                "formats": ["fgb"],
+                            }
+                        },
+                    ],
+                },
+            },
+        },
+    ),
+):
+    """
+    Process data based on dynamic categories.
+
+    Args:
+        request: FastAPI Request object.
+        params (DynamicCategoriesModel): Input parameters including ISO3 country code and dynamic categories.
+
+    Returns:
+        dict: Result message.
+    """
+    hdx_set = HDX(params.iso3).process_hdx_tags(params)
+    return {"message": "Data processed successfully"}
diff --git a/API/main.py b/API/main.py
index 4c9ed793..c72ec28b 100644
--- a/API/main.py
+++ b/API/main.py
@@ -40,6 +40,7 @@
 from src.db_session import database_instance
 
 from .auth.routers import router as auth_router
+from .hdx import router as hdx_router
 from .raw_data import router as raw_data_router
 from .tasks import router as tasks_router
 
@@ -66,6 +67,7 @@
 app.include_router(auth_router)
 app.include_router(raw_data_router)
 app.include_router(tasks_router)
+app.include_router(hdx_router)
 if ENABLE_POLYGON_STATISTICS_ENDPOINTS:
     app.include_router(stats_router)
 
diff --git a/API/stats.py b/API/stats.py
index 8fa5ec24..302bc163 100644
--- a/API/stats.py
+++ b/API/stats.py
@@ -22,6 +22,6 @@ async def get_polygon_stats(request: Request, params: StatsRequestParams):
     Returns:
         dict: A dictionary containing statistics for the specified polygon.
     """
-    generator = PolygonStats(params.geometry)
+    generator = PolygonStats(params.geometry, params.iso3)
 
     return generator.get_summary_stats()
diff --git a/requirements.txt b/requirements.txt
index ed1904dc..75a706ce 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -34,4 +34,7 @@ neoteroi-mkdocs==0.1.2
 pdocs==1.0.1
 
 ##sozipfile
-sozipfile==0.3.2
\ No newline at end of file
+sozipfile==0.3.2
+
+##duckdb
+duckdb==0.9.2
\ No newline at end of file
diff --git a/src/app.py b/src/app.py
index 7b002057..59698508 100644
--- a/src/app.py
+++ b/src/app.py
@@ -17,19 +17,27 @@
 # 1100 13th Street NW Suite 800 Washington, D.C. 20005
 # <info@hotosm.org>
 """Page contains Main core logic of app"""
+import concurrent.futures
 import os
+import pathlib
+import re
+import shutil
 import subprocess
 import sys
-import threading
 import time
+import uuid
 from datetime import datetime
+from datetime import datetime as dt
+from datetime import timezone
 from json import dumps
 from json import loads as json_loads
 
 import boto3
+import duckdb
 import humanize
 import orjson
 import requests
+import sozipfile.sozipfile as zipfile
 from area import area
 from fastapi import HTTPException
 from geojson import FeatureCollection
@@ -40,6 +48,7 @@
     AWS_ACCESS_KEY_ID,
     AWS_SECRET_ACCESS_KEY,
     BUCKET_NAME,
+    ENABLE_POLYGON_STATISTICS_ENDPOINTS,
     ENABLE_TILES,
     EXPORT_MAX_AREA_SQKM,
 )
@@ -47,17 +56,21 @@
 from src.config import INDEX_THRESHOLD as index_threshold
 from src.config import POLYGON_STATISTICS_API_URL
 from src.config import USE_CONNECTION_POOLING as use_connection_pooling
-from src.config import get_db_connection_params, level
+from src.config import USE_S3_TO_UPLOAD, get_db_connection_params, level
 from src.config import logger as logging
 from src.query_builder.builder import (
     check_exisiting_country,
     check_last_updated_rawdata,
+    extract_features_duckdb,
     extract_geometry_type_query,
     generate_polygon_stats_graphql_query,
     get_countries_query,
+    get_country_from_iso,
     get_country_geojson,
+    get_country_geom_from_iso,
     get_country_id_query,
     get_osm_feature_query,
+    postgres2duckdb_query,
     raw_currentdata_extraction_query,
     raw_extract_plain_geojson,
 )
@@ -96,6 +109,11 @@ def print_psycopg2_exception(err):
     raise err
 
 
+def convert_dict_to_conn_str(db_dict):
+    conn_str = " ".join([f"{key}={value}" for key, value in db_dict.items()])
+    return conn_str
+
+
 def check_for_json(result_str):
     """Check if the Payload is a JSON document
 
@@ -866,13 +884,14 @@ def get_bucket_location(self, bucket_name):
             raise ex
         return bucket_location or "us-east-1"
 
-    def upload(self, file_path, file_name, file_suffix="zip"):
+    def upload(self, file_path, file_name, file_suffix=None):
         """Used for transferring file to s3 after reading path from the user , It will wait for the upload to complete
         Parameters :file_path --- your local file path to upload ,
             file_prefix -- prefix for the filename which is stored
         sample function call :
             S3FileTransfer.transfer(file_path="exports",file_prefix="upload_test")"""
-        file_name = f"{file_name}.{file_suffix}"
+        if file_suffix:
+            file_name = f"{file_name}.{file_suffix}"
         logging.debug("Started Uploading %s from %s", file_name, file_path)
         # instantiate upload
         start_time = time.time()
@@ -894,7 +913,7 @@ def upload(self, file_path, file_name, file_suffix="zip"):
 class PolygonStats:
     """Generates stats for polygon"""
 
-    def __init__(self, geojson):
+    def __init__(self, geojson=None, iso3=None):
         """
         Initialize PolygonStats with the provided GeoJSON.
 
@@ -902,7 +921,22 @@ def __init__(self, geojson):
             geojson (dict): GeoJSON representation of the polygon.
         """
         self.API_URL = POLYGON_STATISTICS_API_URL
-        self.INPUT_GEOM = dumps(geojson)
+        if geojson is None and iso3 is None:
+            raise HTTPException(
+                status_code=404, detail="Either geojson or iso3 should be passed"
+            )
+
+        if iso3:
+            dbdict = get_db_connection_params()
+            d_b = Database(dbdict)
+            con, cur = d_b.connect()
+            cur.execute(get_country_geom_from_iso(iso3))
+            result = cur.fetchone()
+            if result is None:
+                raise HTTPException(status_code=404, detail="Invalid iso3 code")
+            self.INPUT_GEOM = result[0]
+        else:
+            self.INPUT_GEOM = dumps(geojson)
 
     @staticmethod
     def get_building_pattern_statement(
@@ -1063,3 +1097,205 @@ def get_summary_stats(self):
         }
 
         return return_stats
+
+
+class DuckDB:
+    def __init__(self, db_path):
+        dbdict = get_db_connection_params()
+        self.db_con_str = convert_dict_to_conn_str(db_dict=dbdict)
+        self.db_path = db_path
+        if os.path.exists(self.db_path):
+            os.remove(self.db_path)
+        con = duckdb.connect(self.db_path)
+        con.sql(f"""ATTACH '{self.db_con_str}' AS postgres_db (TYPE POSTGRES)""")
+        con.install_extension("spatial")
+        con.install_extension("json")
+        con.load_extension("spatial")
+        con.load_extension("json")
+
+    def run_query(self, query, attach_pgsql=False, load_spatial=False):
+        with duckdb.connect(self.db_path) as con:
+            if attach_pgsql:
+                con.execute(
+                    f"""ATTACH '{self.db_con_str}' AS postgres_db (TYPE POSTGRES)"""
+                )
+                load_spatial = True
+            if load_spatial:
+                con.load_extension("spatial")
+            # con.load_extension("json")
+            con.execute(query)
+
+
+class HDXUploader:
+    def __init__(self, dataset_prefix, export_url, category):
+        self.dataset_prefix = dataset_prefix
+        self.export_url = export_url
+        self.category = category
+
+    # def
+
+
+class HDX:
+    def __init__(self, ISO3):
+        self.iso3 = ISO3.lower()
+        dbdict = get_db_connection_params()
+        d_b = Database(dbdict)
+        con, cur = d_b.connect()
+        cur.execute(get_country_from_iso(self.iso3))
+        result = cur.fetchall()[0]
+        if not result:
+            raise HTTPException(status_code=404, detail="Invalid iso3 code")
+
+        (
+            self.cid,
+            self.dataset_name,
+            self.dataset_prefix,
+            self.dataset_locations,
+        ) = result
+
+        self.uuid = str(uuid.uuid4())
+        self.default_export_path = os.path.join(
+            export_path, self.uuid, "HDX", self.iso3.upper()
+        )
+        if os.path.exists(self.default_export_path):
+            shutil.rmtree(self.default_export_path)
+        os.makedirs(self.default_export_path)
+        self.duck_db_instance = DuckDB(
+            os.path.join(self.default_export_path, f"{self.iso3}.db")
+        )
+
+    def types_to_tables(self, type_list: list):
+        mapping = {
+            "points": ["nodes"],
+            "lines": ["ways_line", "relations"],
+            "polygons": ["ways_poly", "relations"],
+        }
+
+        table_set = set()
+
+        for t in type_list:
+            if t in mapping:
+                table_set.update(mapping[t])
+
+        return list(table_set)
+
+    def format_where_clause(self, where_clause):
+        pattern = r"tags\['([^']+)'\]\[1\]"
+        match = re.search(pattern, where_clause)
+
+        if match:
+            key = match.group(1)
+            return where_clause.replace(match.group(0), key)
+        else:
+            return where_clause
+
+    # def s3url_to_hdx(self , url, category):
+
+    def zip_to_s3(self, zip_path):
+        s3_upload_name = os.path.relpath(zip_path, os.path.join(export_path, self.uuid))
+
+        if not USE_S3_TO_UPLOAD:
+            raise HTTPException(
+                status_code=404, detail="S3 Export service is disabled on server"
+            )
+        file_transfer_obj = S3FileTransfer()
+        download_url = file_transfer_obj.upload(
+            zip_path,
+            str(s3_upload_name),
+        )
+        return download_url
+        # if ENABLE_POLYGON_STATISTICS_ENDPOINTS:
+        #     polygon_stats = PolygonStats(iso3=self.iso3).get_summary_stats()
+        #     readme_content += f'{polygon_stats["summary"]["building"]}\n'
+        #     readme_content += f'{polygon_stats["summary"]["road"]}\n'
+        #     readme_content += "Read about what this summary means: indicators: https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/indicators.md,metrics: https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/metrics.md"
+
+    def file_to_zip(self, working_dir, zip_path):
+        zf = zipfile.ZipFile(
+            zip_path,
+            "w",
+            compression=zipfile.ZIP_DEFLATED,
+            chunk_size=zipfile.SOZIP_DEFAULT_CHUNK_SIZE,
+        )
+        for file_path in pathlib.Path(working_dir).iterdir():
+            zf.write(file_path, arcname=file_path.name)
+        utc_now = dt.now(timezone.utc)
+        utc_offset = utc_now.strftime("%z")
+        # Adding metadata readme.txt
+        readme_content = f"Exported Timestamp (UTC{utc_offset}): {utc_now.strftime('%Y-%m-%d %H:%M:%S')}\n"
+        readme_content += "Exported through Raw-data-api (https://github.com/hotosm/raw-data-api) using OpenStreetMap data.\n"
+        readme_content += "Learn more about OpenStreetMap and its data usage policy : https://www.openstreetmap.org/about \n"
+        zf.writestr("Readme.txt", readme_content)
+        zf.close()
+        shutil.rmtree(working_dir)
+        return zip_path
+
+    def query_to_file(self, query, category_name, feature_type, export_formats):
+        category_name = category_name.lower().replace(" ", "_")
+        file_export_path = os.path.join(
+            self.default_export_path, category_name, feature_type
+        )
+        for export_format in export_formats:
+            export_format_path = os.path.join(file_export_path, export_format.suffix)
+            os.makedirs(export_format_path, exist_ok=True)
+
+            export_filename = f"""{self.dataset_prefix}_{category_name}_{feature_type}_{export_format.suffix}"""
+            export_file_path = os.path.join(
+                export_format_path, f"{export_filename}.{export_format.suffix}"
+            )
+
+            if os.path.exists(export_file_path):
+                os.remove(export_file_path)
+
+            layer_creation_options_str = (
+                " ".join(
+                    [f"'{option}'" for option in export_format.layer_creation_options]
+                )
+                if export_format.layer_creation_options
+                else ""
+            )
+            executable_query = f"""COPY ({query.strip()}) TO '{export_file_path}' WITH (FORMAT {export_format.format_option}, DRIVER '{export_format.driver_name}'{f', LAYER_CREATION_OPTIONS {layer_creation_options_str}' if layer_creation_options_str else ''})"""
+            self.duck_db_instance.run_query(executable_query.strip(), load_spatial=True)
+            zip_file_path = os.path.join(file_export_path, f"{export_filename}.zip")
+            zip_path = self.file_to_zip(export_format_path, zip_file_path)
+            return zip_path
+
+    def process_category(self, category):
+        category_name, category_data = list(category.items())[0]
+        for feature_type in category_data.types:
+            extract_query = extract_features_duckdb(
+                self.iso3, category_data.select, feature_type, category_data.where
+            )
+            zip_path = self.query_to_file(
+                extract_query, category_name, feature_type, category_data.formats
+            )
+            s3_download_url = self.zip_to_s3(zip_path)
+            return s3_download_url
+
+    def process_hdx_tags(self, params):
+        table_type = [
+            cat_type
+            for category in params.categories
+            for cat_type in list(category.values())[0].types
+        ]
+        table_names = self.types_to_tables(list(set(table_type)))
+
+        for table in table_names:
+            create_table = postgres2duckdb_query(self.iso3, self.cid, table)
+            self.duck_db_instance.run_query(create_table.strip(), attach_pgsql=True)
+
+        with concurrent.futures.ThreadPoolExecutor(
+            max_workers=os.cpu_count() * 2
+        ) as executor:
+            futures = {
+                executor.submit(self.process_category, category): category
+                for category in params.categories
+            }
+
+            for future in concurrent.futures.as_completed(futures):
+                category = futures[future]
+                try:
+                    result = future.result()
+                    print(result, category)
+                except Exception as e:
+                    logging.error(f"An error occurred for category {category}: {e}")
diff --git a/src/config.py b/src/config.py
index e0c9872f..a7b4c34f 100644
--- a/src/config.py
+++ b/src/config.py
@@ -175,6 +175,26 @@
     "POLYGON_STATISTICS_API_RATE_LIMIT"
 ) or config.get("API_CONFIG", "POLYGON_STATISTICS_API_RATE_LIMIT", fallback=5)
 
+ENABLE_HDX_EXPORTS = os.environ.get("ENABLE_HDX_EXPORTS") or config.getboolean(
+    "HDX", "ENABLE_HDX_EXPORTS", fallback=False
+)
+
+HDX_SITE = os.environ.get("HDX_SITE") or config.getboolean(
+    "HDX", "HDX_SITE", fallback="demo"
+)
+HDX_API_KEY = os.environ.get("HDX_API_KEY") or config.getboolean(
+    "HDX", "HDX_API_KEY", fallback=None
+)
+
+if ENABLE_HDX_EXPORTS:
+    from hdx.api.configuration import Configuration
+
+    HDX_URL_PREFIX = Configuration.create(
+        hdx_site=HDX_SITE,
+        hdx_key=HDX_API_KEY,
+        user_agent="HDXPythonLibrary/6.2.0-HOTOSM OSM Exports",
+    )
+
 
 def get_db_connection_params() -> dict:
     """Return a python dict that can be passed to psycopg2 connections
diff --git a/src/query_builder/builder.py b/src/query_builder/builder.py
index b2355a57..eefabb5c 100644
--- a/src/query_builder/builder.py
+++ b/src/query_builder/builder.py
@@ -806,3 +806,71 @@ def generate_polygon_stats_graphql_query(geojson_feature):
     query = query % dumps(geojson_feature)
 
     return query
+
+
+def get_country_from_iso(iso3):
+    query = f"""SELECT
+                    b.cid::int as fid, b.description as name, b.dataset_name as dataset_prefix, b.locations as locations
+                FROM
+                    countries b
+                WHERE
+                    LOWER(iso_3) = '{iso3}'
+                """
+    return query
+
+
+def postgres2duckdb_query(iso3, cid, table, enable_users_detail=False):
+    select_query = (
+        """osm_id, version, changeset, timestamp, tags, ST_AsBinary(geom) as geometry"""
+    )
+    create_select_duck_db = """osm_id,version, changeset, timestamp, cast(tags::json AS map(varchar, varchar)) AS tags, cast(ST_GeomFromWKB(geometry) as GEOMETRY) AS geometry"""
+
+    if enable_users_detail:
+        select_query = """osm_id, uid, user, version, changeset, timestamp, tags, ST_AsBinary(geom) as geometry"""
+        create_select_duck_db = """osm_id, uid, user, version, changeset, timestamp, cast(tags::json AS map(varchar, varchar)) AS tags, cast(ST_GeomFromWKB(geometry) as GEOMETRY) AS geometry"""
+
+    duck_db_create = f"""CREATE TABLE {iso3}_{table} AS SELECT {create_select_duck_db} FROM postgres_query("postgres_db", "SELECT {select_query} FROM {table} WHERE country <@ ARRAY [{cid}]") """
+
+    return duck_db_create
+
+
+def extract_features_duckdb(iso3, select, feature_type, where):
+    map_tables = {
+        "points": {"table": ["nodes"], "where": {"nodes": where}},
+        "lines": {
+            "table": ["ways_line", "relations"],
+            "where": {
+                "ways_line": where,
+                "relations": f"{where} and ST_GeometryType(geometry)='MULTILINESTRING'",
+            },
+        },
+        "polygons": {
+            "table": ["ways_poly", "relations"],
+            "where": {
+                "ways_poly": where,
+                "relations": f"{where} and (ST_GeometryType(geometry)='MULTIPOLYGON' or ST_GeometryType(geometry)='POLYGON')",
+            },
+        },
+    }
+
+    select = [f"tags['{item}'][1] as '{item}'" for item in select]
+    select += ["osm_id", "geometry"]
+    select_query = ", ".join(select)
+
+    from_query = map_tables[feature_type]["table"]
+    base_query = []
+    for table in from_query:
+        query = f"""select {select_query} from {f"{iso3}_{table}"} where {map_tables[feature_type]['where'][table]}"""
+        base_query.append(query)
+    return " UNION ALL ".join(base_query)
+
+
+def get_country_geom_from_iso(iso3):
+    query = f"""SELECT
+                    ST_AsGeoJSON(geometry) as geom
+                FROM
+                    countries b
+                WHERE
+                    LOWER(iso_3) = '{iso3}'
+                """
+    return query
diff --git a/src/validation/models.py b/src/validation/models.py
index c8cf2fae..a0f857ea 100644
--- a/src/validation/models.py
+++ b/src/validation/models.py
@@ -252,6 +252,7 @@ class Config:
 
 class StatsRequestParams(BaseModel):
     geometry: Union[Polygon, MultiPolygon] = Field(
+        default=None,
         example={
             "type": "Polygon",
             "coordinates": [
@@ -265,13 +266,30 @@ class StatsRequestParams(BaseModel):
             ],
         },
     )
+    so3: str = Field(
+        default=None,
+        description="ISO3 Country Code.",
+        min_length=3,
+        max_length=3,
+        example="NPL",
+    )
 
-    @validator("geometry", allow_reuse=True)
-    def get_value_as_feature(cls, value):
-        """Converts geometry to geojson feature"""
-        feature = {
-            "type": "Feature",
-            "geometry": json.loads(value.json()),
-            "properties": {},
-        }
-        return feature
+    @validator("geometry", pre=True, always=True)
+    def set_geometry_or_iso3(cls, value, values):
+        """Either geometry or iso3 should be supplied."""
+        if value is not None and values.get("iso3") is not None:
+            raise ValueError("Only one of geometry or iso3 should be supplied.")
+        if value is None and values.get("iso3") is None:
+            raise ValueError("Either geometry or iso3 should be supplied.")
+        return value
+
+    @validator("geometry", pre=True, always=True)
+    def validate_geometry(cls, value):
+        """Converts geometry to geojson feature."""
+        if value is not None:
+            feature = {
+                "type": "Feature",
+                "geometry": json.loads(value.json()),
+                "properties": {},
+            }
+            return feature

From d8acc5e5279d927ce182ac9ad4af91c71742ee4a Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Tue, 19 Dec 2023 16:41:10 +0545
Subject: [PATCH 02/20] added hdx python api in requirements

---
 requirements.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 75a706ce..3162cf60 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -37,4 +37,7 @@ pdocs==1.0.1
 sozipfile==0.3.2
 
 ##duckdb
-duckdb==0.9.2
\ No newline at end of file
+duckdb==0.9.2
+
+##hdx
+hdx-python-api==6.2.0
\ No newline at end of file

From 8d6f61e028a9f0b1976430f81c6d60e17495e468 Mon Sep 17 00:00:00 2001
From: kshtiijrajsharma <skshitizraj@gmail.com>
Date: Tue, 19 Dec 2023 20:30:51 +0545
Subject: [PATCH 03/20] Upgrade python and added first version of hdx export

---
 API/hdx.py               |   3 +-
 API/raw_data.py          |   8 +--
 requirements.txt         |  43 ++++++-----
 src/app.py               | 152 ++++++++++++++++++++++++++++++---------
 src/config.py            |   3 +-
 src/validation/models.py |   8 +--
 6 files changed, 151 insertions(+), 66 deletions(-)

diff --git a/API/hdx.py b/API/hdx.py
index 3d8703b8..e2853935 100644
--- a/API/hdx.py
+++ b/API/hdx.py
@@ -95,7 +95,6 @@ class DynamicCategoriesModel(BaseModel):
         max_length=3,
         example="USA",
     )
-
     categories: List[Dict[str, CategoryModel]] = Field(
         ...,
         description="List of dynamic categories.",
@@ -124,7 +123,7 @@ async def process_data(
     params: DynamicCategoriesModel = Body(
         ...,
         description="Input parameters including ISO3 country code and dynamic categories.",
-        examples={
+        openapi_examples={
             "normal": {
                 "summary": "Example: Road extraction set",
                 "description": "Query to extract road in Nepal",
diff --git a/API/raw_data.py b/API/raw_data.py
index 82669feb..830ccfa0 100644
--- a/API/raw_data.py
+++ b/API/raw_data.py
@@ -64,7 +64,7 @@ def get_osm_current_snapshot_as_file(
     request: Request,
     params: RawDataCurrentParams = Body(
         default={},
-        examples={
+        openapi_examples={
             "normal": {
                 "summary": "Example : Extract Evertyhing in the area",
                 "description": "**Query** to Extract everything in the area , You can pass your geometry only and you will get everything on that area",
@@ -450,7 +450,7 @@ def get_osm_current_snapshot_as_file(
     return JSONResponse({"task_id": task.id, "track_link": f"/tasks/status/{task.id}/"})
 
 
-@router.post("/snapshot/plain/", response_model=FeatureCollection)
+@router.post("/snapshot/plain/")
 @version(1)
 def get_osm_current_snapshot_as_plain_geojson(
     request: Request,
@@ -482,14 +482,14 @@ def get_osm_current_snapshot_as_plain_geojson(
     return result
 
 
-@router.get("/countries/", response_model=FeatureCollection)
+@router.get("/countries/")
 @version(1)
 def get_countries(q: str = ""):
     result = RawData().get_countries_list(q)
     return result
 
 
-@router.get("/osm_id/", response_model=FeatureCollection)
+@router.get("/osm_id/")
 @version(1)
 def get_osm_feature(osm_id: int):
     return RawData().get_osm_feature(osm_id)
diff --git a/requirements.txt b/requirements.txt
index 3162cf60..04d7fb81 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,37 +1,34 @@
-aiofiles==0.7.0
-asgiref==3.3.4
-click==8.0.3
-fastapi==0.65.2
-h11==0.12.0
-importlib-metadata==4.5.0
-psycopg2==2.9.1
-pydantic==1.10.2
-starlette==0.14.2
-typing-extensions==4.1.0
-uvicorn==0.14.0
-zipp==3.4.1
-geojson-pydantic==0.3.0
-pytest == 7.3.1
-geojson == 2.5.0
+fastapi==0.105.0 
+uvicorn==0.24.0
+psycopg2==2.9.9
+geojson-pydantic==1.0.1
+pytest == 7.4.3
+geojson == 3.1.0
+
 # Used for new relic monitoring
 newrelic == 7.2.4.171
 sentry-sdk == 1.5.12
 
+## Third party
 area==1.1.1
 orjson==3.9.10
 boto3==1.24.38
 fastapi-versioning==0.10.0
 redis==4.3.4
 celery==5.2.7
-flower==1.2.0
 slowapi==0.1.6
-osm-login-python==0.0.2
+osm-login-python==1.0.2
+humanize==4.9.0
+python-slugify==8.0.1
 
 #''' required for generating documentations '''
-mkdocs-material==8.5.11
-mkdocs-jupyter==0.22.0
-neoteroi-mkdocs==0.1.2
-pdocs==1.0.1
+# mkdocs-material==8.5.11
+# mkdocs-jupyter==0.22.0
+# neoteroi-mkdocs==0.1.2
+# pdocs==1.0.1
+
+## flower
+# flower==1.2.0
 
 ##sozipfile
 sozipfile==0.3.2
@@ -40,4 +37,6 @@ sozipfile==0.3.2
 duckdb==0.9.2
 
 ##hdx
-hdx-python-api==6.2.0
\ No newline at end of file
+hdx-python-api==6.2.0
+
+
diff --git a/src/app.py b/src/app.py
index 59698508..1b4c5150 100644
--- a/src/app.py
+++ b/src/app.py
@@ -33,7 +33,6 @@
 from json import loads as json_loads
 
 import boto3
-import duckdb
 import humanize
 import orjson
 import requests
@@ -43,11 +42,13 @@
 from geojson import FeatureCollection
 from psycopg2 import OperationalError, connect
 from psycopg2.extras import DictCursor
+from slugify import slugify
 
 from src.config import (
     AWS_ACCESS_KEY_ID,
     AWS_SECRET_ACCESS_KEY,
     BUCKET_NAME,
+    ENABLE_HDX_EXPORTS,
     ENABLE_POLYGON_STATISTICS_ENDPOINTS,
     ENABLE_TILES,
     EXPORT_MAX_AREA_SQKM,
@@ -83,8 +84,10 @@
     database_instance = None
 import logging as log
 
-# assigning global variable of pooling so that it
-# will be accessible from any function within this script
+if ENABLE_HDX_EXPORTS:
+    import duckdb
+    from hdx.data.dataset import Dataset
+
 global LOCAL_CON_POOL
 
 # getting the pool instance which was fireup when API is started
@@ -658,7 +661,7 @@ def get_grid_id(geom, cur):
     @staticmethod
     def geojson2tiles(geojson_path, tile_path, tile_layer_name):
         """Responsible for geojson to tiles"""
-        cmd = """tippecanoe -zg --projection=EPSG:4326 -o {tile_output_path} -l {tile_layer_name} {geojson_input_path} --force""".format(
+        cmd = """tippecanoe -zg --projection=EPSG:4326 -o {tile_output_path} -l {tile_layer_name} --force {geojson_input_path}""".format(
             tile_output_path=tile_path,
             tile_layer_name=tile_layer_name,
             geojson_input_path=geojson_path,
@@ -1126,15 +1129,6 @@ def run_query(self, query, attach_pgsql=False, load_spatial=False):
             con.execute(query)
 
 
-class HDXUploader:
-    def __init__(self, dataset_prefix, export_url, category):
-        self.dataset_prefix = dataset_prefix
-        self.export_url = export_url
-        self.category = category
-
-    # def
-
-
 class HDX:
     def __init__(self, ISO3):
         self.iso3 = ISO3.lower()
@@ -1189,21 +1183,24 @@ def format_where_clause(self, where_clause):
         else:
             return where_clause
 
-    # def s3url_to_hdx(self , url, category):
-
-    def zip_to_s3(self, zip_path):
-        s3_upload_name = os.path.relpath(zip_path, os.path.join(export_path, self.uuid))
+    def zip_to_s3(self, resources):
+        for resource in resources:
+            s3_upload_name = os.path.relpath(
+                resource["zip_path"], os.path.join(export_path, self.uuid)
+            )
 
-        if not USE_S3_TO_UPLOAD:
-            raise HTTPException(
-                status_code=404, detail="S3 Export service is disabled on server"
+            if not USE_S3_TO_UPLOAD:
+                raise HTTPException(
+                    status_code=404, detail="S3 Export service is disabled on server"
+                )
+            file_transfer_obj = S3FileTransfer()
+            download_url = file_transfer_obj.upload(
+                resource["zip_path"],
+                str(s3_upload_name),
             )
-        file_transfer_obj = S3FileTransfer()
-        download_url = file_transfer_obj.upload(
-            zip_path,
-            str(s3_upload_name),
-        )
-        return download_url
+            resource["download_url"] = download_url
+
+        return resources
         # if ENABLE_POLYGON_STATISTICS_ENDPOINTS:
         #     polygon_stats = PolygonStats(iso3=self.iso3).get_summary_stats()
         #     readme_content += f'{polygon_stats["summary"]["building"]}\n'
@@ -1235,6 +1232,7 @@ def query_to_file(self, query, category_name, feature_type, export_formats):
         file_export_path = os.path.join(
             self.default_export_path, category_name, feature_type
         )
+        resources = []
         for export_format in export_formats:
             export_format_path = os.path.join(file_export_path, export_format.suffix)
             os.makedirs(export_format_path, exist_ok=True)
@@ -1258,7 +1256,14 @@ def query_to_file(self, query, category_name, feature_type, export_formats):
             self.duck_db_instance.run_query(executable_query.strip(), load_spatial=True)
             zip_file_path = os.path.join(file_export_path, f"{export_filename}.zip")
             zip_path = self.file_to_zip(export_format_path, zip_file_path)
-            return zip_path
+            resource = {}
+            resource["filename"] = f"{export_filename}.zip"
+            resource["zip_path"] = zip_path
+            resource["format_suffix"] = export_format.suffix
+            resource["format_description"] = export_format.driver_name
+
+            resources.append(resource)
+        return resources
 
     def process_category(self, category):
         category_name, category_data = list(category.items())[0]
@@ -1266,11 +1271,11 @@ def process_category(self, category):
             extract_query = extract_features_duckdb(
                 self.iso3, category_data.select, feature_type, category_data.where
             )
-            zip_path = self.query_to_file(
+            resources = self.query_to_file(
                 extract_query, category_name, feature_type, category_data.formats
             )
-            s3_download_url = self.zip_to_s3(zip_path)
-            return s3_download_url
+            uploaded_resources = self.zip_to_s3(resources)
+            return uploaded_resources
 
     def process_hdx_tags(self, params):
         table_type = [
@@ -1295,7 +1300,88 @@ def process_hdx_tags(self, params):
             for future in concurrent.futures.as_completed(futures):
                 category = futures[future]
                 try:
-                    result = future.result()
-                    print(result, category)
+                    uploaded_resources = future.result()
+                    print(uploaded_resources, category)
+                    self.resource_to_hdx(uploaded_resources, category)
+
                 except Exception as e:
-                    logging.error(f"An error occurred for category {category}: {e}")
+                    raise e
+                    # logging.error(f"An error occurred for category {category}: {e}")
+
+    def resource_to_hdx(self, uploaded_resources, category):
+        uploader = HDXUploader(category)
+        uploader.init_dataset(
+            self.dataset_prefix, self.dataset_name, self.dataset_locations
+        )
+        for resource in uploaded_resources:
+            uploader.add_resource(
+                resource["filename"],
+                resource["format_suffix"],
+                resource["format_description"],
+                resource["download_url"],
+            )
+        uploader.upload_dataset()
+
+
+class HDXUploader:
+    def __init__(self, category):
+        self.category_name, self.category_data = list(category.items())[0]
+        self.dataset = None
+
+    def slugify(self, name):
+        return slugify(name).replace("-", "_")
+
+    def add_resource(
+        self, resource_name, resource_format, resource_description, export_url
+    ):
+        if self.dataset:
+            resource = {
+                "name": resource_name,
+                "format": resource_format,
+                "description": resource_description,
+                "url": export_url,
+                "last_modified": datetime.now().isoformat(),
+            }
+            print(resource)
+            self.dataset.add_update_resource(resource)
+
+    def upload_dataset(self):
+        if self.dataset:
+            exists = Dataset.read_from_hdx(self.dataset["name"])
+            if exists:
+                # self.dataset.set_date_of_dataset(datetime.now())
+                self.dataset.update_in_hdx()
+            else:
+                # self.dataset.set_date_of_dataset(datetime.now())
+                self.dataset.create_in_hdx(allow_no_resources=True)
+
+    def init_dataset(
+        self,
+        dataset_prefix,
+        dataset_name,
+        dataset_locations,
+    ):
+        self.dataset = Dataset(
+            {
+                "name": "{0}_{1}".format(
+                    dataset_prefix, self.slugify(self.category_name)
+                ),
+                "title": "{0} {1} (OpenStreetMap Export)".format(
+                    dataset_name, self.category_name
+                ),
+                "owner_org": "225b9f7d-e7cb-4156-96a6-44c9c58d31e3",
+                "maintainer": "6a0688ce-8521-46e2-8edd-8e26c0851ebd",
+                "dataset_source": "OpenStreetMap contributors",
+                "methodology": "Other",
+                "methodology_other": "Volunteered geographic information",
+                "license_id": "hdx-odc-odbl",
+                "updated_by_script": f'Hotosm OSM Exports ({datetime.now().strftime("%Y-%m-%dT%H:%M:%S")}',
+                "data_update_frequency": -2,
+                "caveats": self.category_data.hdx.caveats,
+                ## notes , private and subnational option
+            }
+        )
+        for location in dataset_locations:
+            self.dataset.add_country_location(location)
+        for tag in self.category_data.hdx.tags:
+            self.dataset.add_tag(tag)
diff --git a/src/config.py b/src/config.py
index a7b4c34f..ad7efaca 100644
--- a/src/config.py
+++ b/src/config.py
@@ -182,7 +182,7 @@
 HDX_SITE = os.environ.get("HDX_SITE") or config.getboolean(
     "HDX", "HDX_SITE", fallback="demo"
 )
-HDX_API_KEY = os.environ.get("HDX_API_KEY") or config.getboolean(
+HDX_API_KEY = os.environ.get("HDX_API_KEY") or config.get(
     "HDX", "HDX_API_KEY", fallback=None
 )
 
@@ -194,6 +194,7 @@
         hdx_key=HDX_API_KEY,
         user_agent="HDXPythonLibrary/6.2.0-HOTOSM OSM Exports",
     )
+    print(HDX_URL_PREFIX)
 
 
 def get_db_connection_params() -> dict:
diff --git a/src/validation/models.py b/src/validation/models.py
index a0f857ea..3c793241 100644
--- a/src/validation/models.py
+++ b/src/validation/models.py
@@ -44,7 +44,7 @@ def to_camel(string: str) -> str:
 class BaseModel(PydanticModel):
     class Config:
         alias_generator = to_camel
-        allow_population_by_field_name = True
+        populate_by_name = True
         use_enum_values = True
         # extra = "forbid"
 
@@ -204,7 +204,7 @@ class SnapshotResponse(BaseModel):
     track_link: str
 
     class Config:
-        schema_extra = {
+        json_schema_extra = {
             "example": {
                 "task_id": "aa539af6-83d4-4aa3-879e-abf14fffa03f",
                 "track_link": "/tasks/status/aa539af6-83d4-4aa3-879e-abf14fffa03f/",
@@ -227,7 +227,7 @@ class SnapshotTaskResponse(BaseModel):
     result: SnapshotTaskResult
 
     class Config:
-        schema_extra = {
+        json_schema_extra = {
             "example": {
                 "id": "3fded368-456f-4ef4-a1b8-c099a7f77ca4",
                 "status": "SUCCESS",
@@ -247,7 +247,7 @@ class StatusResponse(BaseModel):
     last_updated: str
 
     class Config:
-        schema_extra = {"example": {"lastUpdated": "2022-06-27 19:59:24+05:45"}}
+        json_schema_extra = {"example": {"lastUpdated": "2022-06-27 19:59:24+05:45"}}
 
 
 class StatsRequestParams(BaseModel):

From 79ffe0779e037231d7dc6303f085c942d2cd0964 Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Wed, 20 Dec 2023 21:30:02 +0545
Subject: [PATCH 04/20] Adds custom polygon support along with all other fields
 required for hdx upload

---
 API/api_worker.py            |   4 +-
 API/hdx.py                   | 153 +++++++++++++++++++++++++++--
 API/test.py                  |  19 ++++
 requirements.txt             |   3 +-
 setup.py                     |  10 +-
 src/app.py                   | 180 +++++++++++++++++++++++------------
 src/config.py                |  55 ++++++++---
 src/query_builder/builder.py |  38 +++++++-
 src/validation/models.py     |   4 +-
 9 files changed, 370 insertions(+), 96 deletions(-)
 create mode 100644 API/test.py

diff --git a/API/api_worker.py b/API/api_worker.py
index 9184ce7c..257a973f 100644
--- a/API/api_worker.py
+++ b/API/api_worker.py
@@ -105,8 +105,8 @@ def process_raw_data(self, params):
             readme_content += "Exported through Raw-data-api (https://github.com/hotosm/raw-data-api) using OpenStreetMap data.\n"
             readme_content += "Learn more about OpenStreetMap and its data usage policy : https://www.openstreetmap.org/about \n"
             if polygon_stats:
-                readme_content += f'{polygon_stats["summary"]["building"]}\n'
-                readme_content += f'{polygon_stats["summary"]["road"]}\n'
+                readme_content += f'{polygon_stats["summary"]["buildings"]}\n'
+                readme_content += f'{polygon_stats["summary"]["roads"]}\n'
                 readme_content += "Read about what this summary means: indicators: https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/indicators.md,metrics: https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/metrics.md"
 
             zf.writestr("Readme.txt", readme_content)
diff --git a/API/hdx.py b/API/hdx.py
index e2853935..3fe7e664 100644
--- a/API/hdx.py
+++ b/API/hdx.py
@@ -1,11 +1,13 @@
 from enum import Enum
-from typing import Dict, List
+from typing import Dict, List, Optional, Union
 
 from fastapi import APIRouter, Body, Query, Request
 from fastapi_versioning import version
+from geojson_pydantic import MultiPolygon, Polygon
 from pydantic import BaseModel, Field, validator
 
 from src.app import HDX
+from src.config import ALLOWED_HDX_TAGS, ALLOWED_HDX_UPDATE_FREQUENCIES
 from src.config import LIMITER as limiter
 from src.config import RATE_LIMIT_PER_MIN
 
@@ -19,10 +21,24 @@ class HDXModel(BaseModel):
         example=["roads", "transportation", "geodata"],
     )
     caveats: str = Field(
-        ...,
-        description="Caveats for the HDX model.",
+        default="OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+        description="Caveats/Warning for the Datasets.",
         example="OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
     )
+    notes: str = Field(
+        default="",
+        description="Extra notes to append in notes section of hdx datasets",
+        example="Sample notes to append",
+    )
+
+    @validator("tags")
+    def validate_tags(cls, value):
+        for item in value:
+            if item.strip() not in ALLOWED_HDX_TAGS:
+                raise ValueError(
+                    f"Invalid tag {item.strip()} , Should be within {ALLOWED_HDX_TAGS}"
+                )
+        return value
 
 
 class CategoryModel(BaseModel):
@@ -87,14 +103,59 @@ def __init__(self, suffix, driver_name, layer_creation_options, format_option):
 }
 
 
+class DatasetConfig(BaseModel):
+    private: bool = Field(
+        default=False,
+        description="Make dataset private , By default False , Public is recommended",
+        example="False",
+    )
+    subnational: bool = Field(
+        default=False,
+        description="Make it true if dataset doesn't cover nation/country",
+        example="False",
+    )
+    update_frequency: str = Field(
+        default="as needed",
+        description="Update frequncy to be added on uploads",
+        example="daily",
+    )
+    dataset_title: str = Field(
+        default=None,
+        description="Dataset title which appears at top of the page",
+        example="Nepal",
+    )
+    dataset_prefix: str = Field(
+        default=None,
+        description="Dataset prefix to be appended before category name, Will be ignored if iso3 is supplied",
+        example="hotosm_npl",
+    )
+    dataset_locations: List[str] = Field(
+        default=None,
+        description="Valid dataset locations iso3",
+        example="['npl']",
+    )
+
+    @validator("update_frequency")
+    def validate_frequency(cls, value):
+        if value.strip() not in ALLOWED_HDX_UPDATE_FREQUENCIES:
+            raise ValueError(
+                f"Invalid update frequency , Should be within {ALLOWED_HDX_UPDATE_FREQUENCIES}"
+            )
+        return value.strip()
+
+
 class DynamicCategoriesModel(BaseModel):
-    iso3: str = Field(
-        ...,
-        description="ISO3 Country Code.",
+    iso3: Optional[str] = Field(
+        default=None,
+        description="ISO3 Country Code",
         min_length=3,
         max_length=3,
         example="USA",
     )
+    dataset: Optional[DatasetConfig] = Field(
+        description="Dataset Configurations for HDX Upload"
+    )
+
     categories: List[Dict[str, CategoryModel]] = Field(
         ...,
         description="List of dynamic categories.",
@@ -113,6 +174,38 @@ class DynamicCategoriesModel(BaseModel):
             }
         ],
     )
+    geometry: Optional[Union[Polygon, MultiPolygon]] = Field(
+        default=None,
+        example={
+            "type": "Polygon",
+            "coordinates": [
+                [
+                    [83.96919250488281, 28.194446860487773],
+                    [83.99751663208006, 28.194446860487773],
+                    [83.99751663208006, 28.214869548073377],
+                    [83.96919250488281, 28.214869548073377],
+                    [83.96919250488281, 28.194446860487773],
+                ]
+            ],
+        },
+    )
+
+    @validator("geometry", pre=True, always=True)
+    def set_geometry_or_iso3(cls, value, values):
+        """Either geometry or iso3 should be supplied."""
+        if value is not None and values.get("iso3") is not None:
+            raise ValueError("Only one of geometry or iso3 should be supplied.")
+        if value is None and values.get("iso3") is None:
+            raise ValueError("Either geometry or iso3 should be supplied.")
+        if value is not None:
+            dataset = values.get("dataset").dict()
+            if dataset is None:
+                raise ValueError("Dataset config should be supplied for custom polygon")
+
+            for item in dataset.keys():
+                if dataset.get(item) is None:
+                    raise ValueError(f"Missing, Dataset config : {item}")
+        return value
 
 
 @router.post("/submit/")
@@ -124,8 +217,8 @@ async def process_data(
         ...,
         description="Input parameters including ISO3 country code and dynamic categories.",
         openapi_examples={
-            "normal": {
-                "summary": "Example: Road extraction set",
+            "normal_iso": {
+                "summary": "Example: Road extraction using iso3",
                 "description": "Query to extract road in Nepal",
                 "value": {
                     "iso3": "NPL",
@@ -145,9 +238,47 @@ async def process_data(
                     ],
                 },
             },
+            "normal_polygon": {
+                "summary": "Example: Road extraction set using custom polygon",
+                "description": "Query to extract road in Pokhara, Nepal",
+                "value": {
+                    "geometry": {
+                        "type": "Polygon",
+                        "coordinates": [
+                            [
+                                [83.96919250488281, 28.194446860487773],
+                                [83.99751663208006, 28.194446860487773],
+                                [83.99751663208006, 28.214869548073377],
+                                [83.96919250488281, 28.214869548073377],
+                                [83.96919250488281, 28.194446860487773],
+                            ]
+                        ],
+                    },
+                    "dataset": {
+                        "subnational": True,
+                        "dataset_title": "Pokhara",
+                        "dataset_prefix": "hotosm_pkr",
+                        "dataset_locations": ["npl"],
+                    },
+                    "categories": [
+                        {
+                            "Roads": {
+                                "hdx": {
+                                    "tags": ["roads", "transportation", "geodata"],
+                                    "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                                },
+                                "types": ["lines"],
+                                "select": ["name", "highway"],
+                                "where": "tags['highway'][1] IS NOT NULL",
+                                "formats": ["fgb"],
+                            }
+                        }
+                    ],
+                },
+            },
             "fullset": {
                 "summary": "Full HDX Dataset default",
-                "description": "Full yaml conversion for dataset",
+                "description": "Full yaml conversion for dataset with iso3 example",
                 "value": {
                     "iso3": "NPL",
                     "categories": [
@@ -435,5 +566,7 @@ async def process_data(
     Returns:
         dict: Result message.
     """
-    hdx_set = HDX(params.iso3).process_hdx_tags(params)
+    if not params.dataset:
+        params.dataset = DatasetConfig()
+    hdx_set = HDX(params).process_hdx_tags()
     return {"message": "Data processed successfully"}
diff --git a/API/test.py b/API/test.py
new file mode 100644
index 00000000..57d01d72
--- /dev/null
+++ b/API/test.py
@@ -0,0 +1,19 @@
+import re
+
+
+def replace_key(input_str):
+    pattern = r"tags\['([^']+)'\]\[1\]"
+    match = re.search(pattern, input_str)
+
+    if match:
+        key = match.group(1)
+        return input_str.replace(match.group(0), key)
+    else:
+        return input_str
+
+
+# Example usage:
+input_str = "tags['railway'][1] IN ('rail','station')"
+result = replace_key(input_str)
+
+print(result)
diff --git a/requirements.txt b/requirements.txt
index 04d7fb81..59c11f54 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,10 +16,11 @@ boto3==1.24.38
 fastapi-versioning==0.10.0
 redis==4.3.4
 celery==5.2.7
-slowapi==0.1.6
+slowapi==0.1.8
 osm-login-python==1.0.2
 humanize==4.9.0
 python-slugify==8.0.1
+geomet==1.1.0
 
 #''' required for generating documentations '''
 # mkdocs-material==8.5.11
diff --git a/setup.py b/setup.py
index 4f2cdfe2..cb5655b0 100644
--- a/setup.py
+++ b/setup.py
@@ -9,14 +9,14 @@
     description="The Raw Data API  module makes it simple for you to get osm data stats provided by api in your own project",
     packages=setuptools.find_packages(),
     install_requires=[
-        "pytest == 7.3.1",
+        "pytest == 7.4.3",
         "psycopg2",
         "boto3==1.24.38",
-        "fastapi==0.65.2",
-        "geojson == 2.5.0",
+        "fastapi==0.105.0",
+        "geojson == 7.4.3",
         "area==1.1.1",
         "orjson==3.9.10",
-        "slowapi==0.1.6",
+        "slowapi==0.1.8",
     ],
     classifiers=[
         "Programming Language :: Python :: 3",
@@ -24,7 +24,7 @@
         "License :: OSI Approved :: MIT License",
         "Operating System :: OS Independent",
     ],
-    python_requires=">=3.6",
+    python_requires=">=3.8",
     long_description=long_description,
     long_description_content_type="text/markdown",
     author="Hot Tech Team",
diff --git a/src/app.py b/src/app.py
index 1b4c5150..f9ed2646 100644
--- a/src/app.py
+++ b/src/app.py
@@ -54,12 +54,15 @@
     EXPORT_MAX_AREA_SQKM,
 )
 from src.config import EXPORT_PATH as export_path
+from src.config import HDX_MAINTAINER, HDX_OWNER_ORG
 from src.config import INDEX_THRESHOLD as index_threshold
 from src.config import POLYGON_STATISTICS_API_URL
 from src.config import USE_CONNECTION_POOLING as use_connection_pooling
 from src.config import USE_S3_TO_UPLOAD, get_db_connection_params, level
 from src.config import logger as logging
 from src.query_builder.builder import (
+    HDX_FILTER_CRITERIA,
+    HDX_MARKDOWN,
     check_exisiting_country,
     check_last_updated_rawdata,
     extract_features_duckdb,
@@ -67,13 +70,10 @@
     generate_polygon_stats_graphql_query,
     get_countries_query,
     get_country_from_iso,
-    get_country_geojson,
     get_country_geom_from_iso,
-    get_country_id_query,
     get_osm_feature_query,
     postgres2duckdb_query,
     raw_currentdata_extraction_query,
-    raw_extract_plain_geojson,
 )
 from src.validation.models import RawDataOutputType
 
@@ -939,7 +939,7 @@ def __init__(self, geojson=None, iso3=None):
                 raise HTTPException(status_code=404, detail="Invalid iso3 code")
             self.INPUT_GEOM = result[0]
         else:
-            self.INPUT_GEOM = dumps(geojson)
+            self.INPUT_GEOM = dumps(json_loads(geojson.json()))
 
     @staticmethod
     def get_building_pattern_statement(
@@ -1073,7 +1073,7 @@ def get_summary_stats(self):
         )
 
         return_stats = {
-            "summary": {"building": building_summary, "road": road_summary},
+            "summary": {"buildings": building_summary, "roads": road_summary},
             "raw": {
                 "population": combined_data["population"],
                 "populatedAreaKm2": combined_data["populatedAreaKm2"],
@@ -1130,32 +1130,50 @@ def run_query(self, query, attach_pgsql=False, load_spatial=False):
 
 
 class HDX:
-    def __init__(self, ISO3):
-        self.iso3 = ISO3.lower()
-        dbdict = get_db_connection_params()
-        d_b = Database(dbdict)
-        con, cur = d_b.connect()
-        cur.execute(get_country_from_iso(self.iso3))
-        result = cur.fetchall()[0]
-        if not result:
-            raise HTTPException(status_code=404, detail="Invalid iso3 code")
+    def __init__(self, params):
+        self.params = params
+        self.iso3 = self.params.iso3
+        if self.iso3:
+            self.iso3 = self.iso3.lower()
+        self.cid = None
+        if self.iso3:
+            dbdict = get_db_connection_params()
+            d_b = Database(dbdict)
+            con, cur = d_b.connect()
+            cur.execute(get_country_from_iso(self.iso3))
+            result = cur.fetchall()[0]
+            if not result:
+                raise HTTPException(status_code=404, detail="Invalid iso3 code")
 
-        (
-            self.cid,
-            self.dataset_name,
-            self.dataset_prefix,
-            self.dataset_locations,
-        ) = result
+            (
+                self.cid,
+                dataset_title,
+                dataset_prefix,
+                dataset_locations,
+            ) = result
+
+            if not self.params.dataset.dataset_title:
+                self.params.dataset.dataset_title = dataset_title
+            if not self.params.dataset.dataset_prefix:
+                self.params.dataset.dataset_prefix = dataset_prefix
+            if not self.params.dataset.dataset_locations:
+                self.params.dataset.dataset_locations = dataset_locations
 
         self.uuid = str(uuid.uuid4())
         self.default_export_path = os.path.join(
-            export_path, self.uuid, "HDX", self.iso3.upper()
+            export_path,
+            self.uuid,
+            "HDX",
+            self.iso3.upper() if self.iso3 else self.params.dataset.dataset_prefix,
         )
         if os.path.exists(self.default_export_path):
             shutil.rmtree(self.default_export_path)
         os.makedirs(self.default_export_path)
         self.duck_db_instance = DuckDB(
-            os.path.join(self.default_export_path, f"{self.iso3}.db")
+            os.path.join(
+                self.default_export_path,
+                f"{self.iso3 if self.iso3 else self.params.dataset.dataset_prefix}.db",
+            )
         )
 
     def types_to_tables(self, type_list: list):
@@ -1199,13 +1217,8 @@ def zip_to_s3(self, resources):
                 str(s3_upload_name),
             )
             resource["download_url"] = download_url
-
+            os.remove(resource["zip_path"])
         return resources
-        # if ENABLE_POLYGON_STATISTICS_ENDPOINTS:
-        #     polygon_stats = PolygonStats(iso3=self.iso3).get_summary_stats()
-        #     readme_content += f'{polygon_stats["summary"]["building"]}\n'
-        #     readme_content += f'{polygon_stats["summary"]["road"]}\n'
-        #     readme_content += "Read about what this summary means: indicators: https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/indicators.md,metrics: https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/metrics.md"
 
     def file_to_zip(self, working_dir, zip_path):
         zf = zipfile.ZipFile(
@@ -1237,7 +1250,7 @@ def query_to_file(self, query, category_name, feature_type, export_formats):
             export_format_path = os.path.join(file_export_path, export_format.suffix)
             os.makedirs(export_format_path, exist_ok=True)
 
-            export_filename = f"""{self.dataset_prefix}_{category_name}_{feature_type}_{export_format.suffix}"""
+            export_filename = f"""{self.params.dataset.dataset_prefix}_{category_name}_{feature_type}_{export_format.suffix}"""
             export_file_path = os.path.join(
                 export_format_path, f"{export_filename}.{export_format.suffix}"
             )
@@ -1269,7 +1282,10 @@ def process_category(self, category):
         category_name, category_data = list(category.items())[0]
         for feature_type in category_data.types:
             extract_query = extract_features_duckdb(
-                self.iso3, category_data.select, feature_type, category_data.where
+                self.iso3 if self.iso3 else self.params.dataset.dataset_prefix,
+                category_data.select,
+                feature_type,
+                category_data.where,
             )
             resources = self.query_to_file(
                 extract_query, category_name, feature_type, category_data.formats
@@ -1277,16 +1293,21 @@ def process_category(self, category):
             uploaded_resources = self.zip_to_s3(resources)
             return uploaded_resources
 
-    def process_hdx_tags(self, params):
+    def process_hdx_tags(self):
         table_type = [
             cat_type
-            for category in params.categories
+            for category in self.params.categories
             for cat_type in list(category.values())[0].types
         ]
         table_names = self.types_to_tables(list(set(table_type)))
 
         for table in table_names:
-            create_table = postgres2duckdb_query(self.iso3, self.cid, table)
+            create_table = postgres2duckdb_query(
+                self.iso3 if self.iso3 else self.params.dataset.dataset_prefix,
+                table,
+                self.cid,
+                self.params.geometry,
+            )
             self.duck_db_instance.run_query(create_table.strip(), attach_pgsql=True)
 
         with concurrent.futures.ThreadPoolExecutor(
@@ -1294,25 +1315,31 @@ def process_hdx_tags(self, params):
         ) as executor:
             futures = {
                 executor.submit(self.process_category, category): category
-                for category in params.categories
+                for category in self.params.categories
             }
 
             for future in concurrent.futures.as_completed(futures):
                 category = futures[future]
                 try:
                     uploaded_resources = future.result()
-                    print(uploaded_resources, category)
-                    self.resource_to_hdx(uploaded_resources, category)
+                    self.resource_to_hdx(
+                        uploaded_resources, self.params.dataset, category
+                    )
 
                 except Exception as e:
                     raise e
                     # logging.error(f"An error occurred for category {category}: {e}")
 
-    def resource_to_hdx(self, uploaded_resources, category):
-        uploader = HDXUploader(category)
-        uploader.init_dataset(
-            self.dataset_prefix, self.dataset_name, self.dataset_locations
+    def resource_to_hdx(self, uploaded_resources, dataset_config, category):
+        uploader = HDXUploader(
+            hdx=dataset_config,
+            category=category,
+            completeness_metadata={
+                "iso3": self.iso3,
+                "geometry": self.params.geometry,
+            },
         )
+        uploader.init_dataset()
         for resource in uploaded_resources:
             uploader.add_resource(
                 resource["filename"],
@@ -1324,13 +1351,51 @@ def resource_to_hdx(self, uploaded_resources, category):
 
 
 class HDXUploader:
-    def __init__(self, category):
+    def __init__(self, category, hdx, completeness_metadata=None):
+        self.hdx = hdx
         self.category_name, self.category_data = list(category.items())[0]
         self.dataset = None
+        self.completeness_metadata = completeness_metadata
+        self.data_completeness_stats = None
 
     def slugify(self, name):
         return slugify(name).replace("-", "_")
 
+    def filter_formatter(self, where_str):
+        pattern = r"tags\['([^']+)'\]\[1\]"
+        match = re.search(pattern, where_str)
+
+        if match:
+            key = match.group(1)
+            return where_str.replace(match.group(0), key)
+        else:
+            return where_str
+
+    def add_notes(self):
+        columns = []
+        for key in self.category_data.select:
+            columns.append(
+                "- [{0}](http://wiki.openstreetmap.org/wiki/Key:{0})".format(key)
+            )
+        columns = "\n".join(columns)
+        filter_str = HDX_FILTER_CRITERIA.format(
+            criteria=self.filter_formatter(self.category_data.where)
+        )
+        if self.category_name.lower() in ["roads", "buildings"]:
+            if self.data_completeness_stats is None:
+                if self.completeness_metadata:
+                    self.data_completeness_stats = PolygonStats(
+                        iso3=self.completeness_metadata["iso3"],
+                        geojson=self.completeness_metadata["geometry"],
+                    ).get_summary_stats()
+            if self.data_completeness_stats:
+                self.category_data.hdx.notes += f'{self.data_completeness_stats["summary"][self.category_name.lower()]}\n'
+                self.category_data.hdx.notes += "Read about what this summary means, [indicators](https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/indicators.md) , [metrics](https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/metrics.md)"
+
+        return self.category_data.hdx.notes + HDX_MARKDOWN.format(
+            columns=columns, filter_str=filter_str
+        )
+
     def add_resource(
         self, resource_name, resource_format, resource_description, export_url
     ):
@@ -1347,40 +1412,35 @@ def add_resource(
 
     def upload_dataset(self):
         if self.dataset:
-            exists = Dataset.read_from_hdx(self.dataset["name"])
-            if exists:
-                # self.dataset.set_date_of_dataset(datetime.now())
-                self.dataset.update_in_hdx()
-            else:
-                # self.dataset.set_date_of_dataset(datetime.now())
-                self.dataset.create_in_hdx(allow_no_resources=True)
-
-    def init_dataset(
-        self,
-        dataset_prefix,
-        dataset_name,
-        dataset_locations,
-    ):
+            self.dataset.set_reference_period(datetime.now())
+            self.dataset.create_in_hdx(allow_no_resources=True)
+
+    def init_dataset(self):
+        dataset_prefix = self.hdx.dataset_prefix
+        dataset_title = self.hdx.dataset_title
+        dataset_locations = self.hdx.dataset_locations
         self.dataset = Dataset(
             {
                 "name": "{0}_{1}".format(
                     dataset_prefix, self.slugify(self.category_name)
                 ),
                 "title": "{0} {1} (OpenStreetMap Export)".format(
-                    dataset_name, self.category_name
+                    dataset_title, self.category_name
                 ),
-                "owner_org": "225b9f7d-e7cb-4156-96a6-44c9c58d31e3",
-                "maintainer": "6a0688ce-8521-46e2-8edd-8e26c0851ebd",
+                "owner_org": HDX_OWNER_ORG,
+                "maintainer": HDX_MAINTAINER,
                 "dataset_source": "OpenStreetMap contributors",
                 "methodology": "Other",
                 "methodology_other": "Volunteered geographic information",
                 "license_id": "hdx-odc-odbl",
                 "updated_by_script": f'Hotosm OSM Exports ({datetime.now().strftime("%Y-%m-%dT%H:%M:%S")}',
-                "data_update_frequency": -2,
                 "caveats": self.category_data.hdx.caveats,
-                ## notes , private and subnational option
+                "private": self.hdx.private,
+                "notes": self.add_notes(),
+                "subnational": 1 if self.hdx.subnational else 0,
             }
         )
+        self.dataset.set_expected_update_frequency(self.hdx.update_frequency)
         for location in dataset_locations:
             self.dataset.add_country_location(location)
         for tag in self.category_data.hdx.tags:
diff --git a/src/config.py b/src/config.py
index ad7efaca..6cdc6c4c 100644
--- a/src/config.py
+++ b/src/config.py
@@ -179,22 +179,55 @@
     "HDX", "ENABLE_HDX_EXPORTS", fallback=False
 )
 
-HDX_SITE = os.environ.get("HDX_SITE") or config.getboolean(
-    "HDX", "HDX_SITE", fallback="demo"
-)
-HDX_API_KEY = os.environ.get("HDX_API_KEY") or config.get(
-    "HDX", "HDX_API_KEY", fallback=None
-)
 
 if ENABLE_HDX_EXPORTS:
+    HDX_SITE = os.environ.get("HDX_SITE") or config.getboolean(
+        "HDX", "HDX_SITE", fallback="demo"
+    )
+    HDX_API_KEY = os.environ.get("HDX_API_KEY") or config.get(
+        "HDX", "HDX_API_KEY", fallback=None
+    )
+    HDX_OWNER_ORG = os.environ.get("HDX_OWNER_ORG") or config.get(
+        "HDX", "HDX_OWNER_ORG", fallback="225b9f7d-e7cb-4156-96a6-44c9c58d31e3"
+    )
+    HDX_MAINTAINER = os.environ.get("HDX_MAINTAINER") or config.get(
+        "HDX", "HDX_MAINTAINER", fallback="6a0688ce-8521-46e2-8edd-8e26c0851ebd"
+    )
     from hdx.api.configuration import Configuration
 
-    HDX_URL_PREFIX = Configuration.create(
-        hdx_site=HDX_SITE,
-        hdx_key=HDX_API_KEY,
-        user_agent="HDXPythonLibrary/6.2.0-HOTOSM OSM Exports",
+    try:
+        HDX_URL_PREFIX = Configuration.create(
+            hdx_site=HDX_SITE,
+            hdx_key=HDX_API_KEY,
+            user_agent="HDXPythonLibrary/6.2.0-HOTOSM OSM Exports",
+        )
+        logging.debug(HDX_URL_PREFIX)
+    except Exception as e:
+        logging.error(
+            f"Error creating HDX configuration: {e}, Disabling the hdx exports feature"
+        )
+        ENABLE_HDX_EXPORTS = False
+
+if ENABLE_HDX_EXPORTS:
+    from hdx.data.dataset import Dataset
+    from hdx.data.vocabulary import Vocabulary
+
+    parse_list = (
+        lambda value, delimiter=",": value.split(delimiter)
+        if isinstance(value, str)
+        else value or []
+    )
+
+    ALLOWED_HDX_TAGS = parse_list(
+        os.environ.get("ENABLE_HDX_EXPORTS")
+        or config.get("HDX", "ALLOWED_HDX_TAGS", fallback=None)
+        or Vocabulary.approved_tags()
+    )
+    ALLOWED_HDX_UPDATE_FREQUENCIES = parse_list(
+        os.environ.get("ALLOWED_HDX_UPDATE_FREQUENCIES")
+        or config.get("HDX", "ALLOWED_HDX_UPDATE_FREQUENCIES", fallback=None)
+        or Dataset.list_valid_update_frequencies()
     )
-    print(HDX_URL_PREFIX)
 
 
 def get_db_connection_params() -> dict:
diff --git a/src/query_builder/builder.py b/src/query_builder/builder.py
index eefabb5c..18d9642e 100644
--- a/src/query_builder/builder.py
+++ b/src/query_builder/builder.py
@@ -18,11 +18,31 @@
 # <info@hotosm.org>
 """Page Contains Query logic required for application"""
 import re
-from json import dumps
+from json import dumps, loads
+
+from geomet import wkt
 
 from src.config import logger as logging
 from src.validation.models import SupportedFilters, SupportedGeometryFilters
 
+HDX_FILTER_CRITERIA = """
+This theme includes all OpenStreetMap features in this area matching:
+
+{criteria}
+"""
+HDX_MARKDOWN = """
+OpenStreetMap exports for use in GIS applications.
+{filter_str}
+Features may have these attributes:
+
+{columns}
+
+This dataset is one of many [OpenStreetMap exports on
+HDX](https://data.humdata.org/organization/hot).
+See the [Humanitarian OpenStreetMap Team](http://hotosm.org/) website for more
+information.
+"""
+
 
 def get_grid_id_query(geometry_dump):
     base_query = f"""select
@@ -819,7 +839,9 @@ def get_country_from_iso(iso3):
     return query
 
 
-def postgres2duckdb_query(iso3, cid, table, enable_users_detail=False):
+def postgres2duckdb_query(
+    base_table_name, table, cid=None, geometry=None, enable_users_detail=False
+):
     select_query = (
         """osm_id, version, changeset, timestamp, tags, ST_AsBinary(geom) as geometry"""
     )
@@ -829,12 +851,18 @@ def postgres2duckdb_query(iso3, cid, table, enable_users_detail=False):
         select_query = """osm_id, uid, user, version, changeset, timestamp, tags, ST_AsBinary(geom) as geometry"""
         create_select_duck_db = """osm_id, uid, user, version, changeset, timestamp, cast(tags::json AS map(varchar, varchar)) AS tags, cast(ST_GeomFromWKB(geometry) as GEOMETRY) AS geometry"""
 
-    duck_db_create = f"""CREATE TABLE {iso3}_{table} AS SELECT {create_select_duck_db} FROM postgres_query("postgres_db", "SELECT {select_query} FROM {table} WHERE country <@ ARRAY [{cid}]") """
+    row_filter_condition = (
+        f"""country <@ ARRAY [{cid}]"""
+        if cid
+        else f"""ST_within(geom,ST_GeomFromText('{wkt.dumps(loads(geometry.json()))}',4326))"""
+    )
+
+    duck_db_create = f"""CREATE TABLE {base_table_name}_{table} AS SELECT {create_select_duck_db} FROM postgres_query("postgres_db", "SELECT {select_query} FROM {table} WHERE {row_filter_condition}") """
 
     return duck_db_create
 
 
-def extract_features_duckdb(iso3, select, feature_type, where):
+def extract_features_duckdb(base_table_name, select, feature_type, where):
     map_tables = {
         "points": {"table": ["nodes"], "where": {"nodes": where}},
         "lines": {
@@ -860,7 +888,7 @@ def extract_features_duckdb(iso3, select, feature_type, where):
     from_query = map_tables[feature_type]["table"]
     base_query = []
     for table in from_query:
-        query = f"""select {select_query} from {f"{iso3}_{table}"} where {map_tables[feature_type]['where'][table]}"""
+        query = f"""select {select_query} from {f"{base_table_name}_{table}"} where {map_tables[feature_type]['where'][table]}"""
         base_query.append(query)
     return " UNION ALL ".join(base_query)
 
diff --git a/src/validation/models.py b/src/validation/models.py
index 3c793241..b7ce0a19 100644
--- a/src/validation/models.py
+++ b/src/validation/models.py
@@ -251,7 +251,7 @@ class Config:
 
 
 class StatsRequestParams(BaseModel):
-    geometry: Union[Polygon, MultiPolygon] = Field(
+    geometry: Optional[Union[Polygon, MultiPolygon]] = Field(
         default=None,
         example={
             "type": "Polygon",
@@ -266,7 +266,7 @@ class StatsRequestParams(BaseModel):
             ],
         },
     )
-    so3: str = Field(
+    iso3: Optional[str] = Field(
         default=None,
         description="ISO3 Country Code.",
         min_length=3,

From 2f5b15a1a9bae001b874fd6df12097201918174c Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 00:38:13 +0545
Subject: [PATCH 05/20] Remove test py and added hdx supported file formats

---
 API/hdx.py                   | 83 ++++++++++++++++++++++--------------
 API/test.py                  | 19 ---------
 src/app.py                   | 81 ++++++++++++++++++-----------------
 src/query_builder/builder.py |  2 +-
 4 files changed, 95 insertions(+), 90 deletions(-)
 delete mode 100644 API/test.py

diff --git a/API/hdx.py b/API/hdx.py
index 3fe7e664..0eb9491a 100644
--- a/API/hdx.py
+++ b/API/hdx.py
@@ -61,7 +61,7 @@ class CategoryModel(BaseModel):
     formats: List[str] = Field(
         ...,
         description="List of Export Formats (suffixes).",
-        example=["gpkg", "fgb"],
+        example=["gpkg", "geojson"],
     )
 
     @validator("types")
@@ -95,9 +95,9 @@ def __init__(self, suffix, driver_name, layer_creation_options, format_option):
     "shp": ExportTypeInfo("shp", "ESRI Shapefile", [], "GDAL"),
     "gpkg": ExportTypeInfo("gpkg", "GeoPackage", [], "GDAL"),
     "sqlite": ExportTypeInfo("sqlite", "SQLite", [], "GDAL"),
-    "fgb": ExportTypeInfo("fgb", "FlatGeobuf", ["VERIFY_BUFFERS=NO"], "GDAL"),
+    "geojson": ExportTypeInfo("geojson", "FlatGeobuf", ["VERIFY_BUFFERS=NO"], "GDAL"),
     "mvt": ExportTypeInfo("mvt", "MVT", [], "GDAL"),
-    "kl": ExportTypeInfo("kml", "KML", [], "GDAL"),
+    "kml": ExportTypeInfo("kml", "KML", [], "GDAL"),
     "gpx": ExportTypeInfo("gpx", "GPX", [], "GDAL"),
     "parquet": ExportTypeInfo("parquet", "PARQUET", [], "PARQUET"),
 }
@@ -153,7 +153,7 @@ class DynamicCategoriesModel(BaseModel):
         example="USA",
     )
     dataset: Optional[DatasetConfig] = Field(
-        description="Dataset Configurations for HDX Upload"
+        default=None, description="Dataset Configurations for HDX Upload"
     )
 
     categories: List[Dict[str, CategoryModel]] = Field(
@@ -169,7 +169,7 @@ class DynamicCategoriesModel(BaseModel):
                     "types": ["lines", "polygons"],
                     "select": ["name", "highway"],
                     "where": "highway IS NOT NULL",
-                    "formats": ["fgb"],
+                    "formats": ["geojson"],
                 }
             }
         ],
@@ -231,8 +231,29 @@ async def process_data(
                                 },
                                 "types": ["lines"],
                                 "select": ["name", "highway"],
-                                "where": "tags['highway'][1] IS NOT NULL",
-                                "formats": ["fgb"],
+                                "where": "tags['highway'] IS NOT NULL",
+                                "formats": ["geojson"],
+                            }
+                        }
+                    ],
+                },
+            },
+            "normal_iso_multiple_format": {
+                "summary": "Example: Road extraction using iso3 Multiple format",
+                "description": "Query to extract road in Nepal Multiple format",
+                "value": {
+                    "iso3": "NPL",
+                    "categories": [
+                        {
+                            "Roads": {
+                                "hdx": {
+                                    "tags": ["roads", "transportation", "geodata"],
+                                    "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                                },
+                                "types": ["lines"],
+                                "select": ["name", "highway"],
+                                "where": "tags['highway'] IS NOT NULL",
+                                "formats": ["geojson", "gpkg", "kml", "shp"],
                             }
                         }
                     ],
@@ -269,8 +290,8 @@ async def process_data(
                                 },
                                 "types": ["lines"],
                                 "select": ["name", "highway"],
-                                "where": "tags['highway'][1] IS NOT NULL",
-                                "formats": ["fgb"],
+                                "where": "tags['highway'] IS NOT NULL",
+                                "formats": ["geojson"],
                             }
                         }
                     ],
@@ -304,8 +325,8 @@ async def process_data(
                                     "office",
                                     "source",
                                 ],
-                                "where": "tags['building'][1] IS NOT NULL",
-                                "formats": ["fgb"],
+                                "where": "tags['building'] IS NOT NULL",
+                                "formats": ["geojson"],
                             }
                         },
                         {
@@ -327,8 +348,8 @@ async def process_data(
                                     "layer",
                                     "source",
                                 ],
-                                "where": "tags['highway'][1] IS NOT NULL",
-                                "formats": ["fgb"],
+                                "where": "tags['highway'] IS NOT NULL",
+                                "formats": ["geojson"],
                             }
                         },
                         {
@@ -351,8 +372,8 @@ async def process_data(
                                     "water",
                                     "source",
                                 ],
-                                "where": "tags['waterway'][1] IS NOT NULL OR tags['water'][1] IS NOT NULL OR tags['natural'][1] IN ('water','wetland','bay')",
-                                "formats": ["fgb"],
+                                "where": "tags['waterway'] IS NOT NULL OR tags['water'] IS NOT NULL OR tags['natural'] IN ('water','wetland','bay')",
+                                "formats": ["geojson"],
                             }
                         },
                         {
@@ -381,8 +402,8 @@ async def process_data(
                                     "addr:city",
                                     "source",
                                 ],
-                                "where": "tags['amenity'][1] IS NOT NULL OR tags['man_made'][1] IS NOT NULL OR tags['shop'][1] IS NOT NULL OR tags['tourism'][1] IS NOT NULL",
-                                "formats": ["fgb"],
+                                "where": "tags['amenity'] IS NOT NULL OR tags['man_made'] IS NOT NULL OR tags['shop'] IS NOT NULL OR tags['tourism'] IS NOT NULL",
+                                "formats": ["geojson"],
                             }
                         },
                         {
@@ -408,8 +429,8 @@ async def process_data(
                                     "addr:city",
                                     "source",
                                 ],
-                                "where": "tags['aeroway'][1] IS NOT NULL OR tags['building'][1] = 'aerodrome' OR tags['emergency:helipad'][1] IS NOT NULL OR tags['emergency'][1] = 'landing_site'",
-                                "formats": ["fgb"],
+                                "where": "tags['aeroway'] IS NOT NULL OR tags['building'] = 'aerodrome' OR tags['emergency:helipad'] IS NOT NULL OR tags['emergency'] = 'landing_site'",
+                                "formats": ["geojson"],
                             }
                         },
                         {
@@ -432,8 +453,8 @@ async def process_data(
                                     "addr:city",
                                     "source",
                                 ],
-                                "where": "tags['amenity'][1] = 'ferry_terminal' OR tags['building'][1] = 'ferry_terminal' OR tags['port'][1] IS NOT NULL",
-                                "formats": ["fgb"],
+                                "where": "tags['amenity'] = 'ferry_terminal' OR tags['building'] = 'ferry_terminal' OR tags['port'] IS NOT NULL",
+                                "formats": ["geojson"],
                             }
                         },
                         {
@@ -456,8 +477,8 @@ async def process_data(
                                     "addr:city",
                                     "source",
                                 ],
-                                "where": "tags['amenity'][1] IN ('kindergarten', 'school', 'college', 'university') OR building IN ('kindergarten', 'school', 'college', 'university')",
-                                "formats": ["fgb"],
+                                "where": "tags['amenity'] IN ('kindergarten', 'school', 'college', 'university') OR building IN ('kindergarten', 'school', 'college', 'university')",
+                                "formats": ["geojson"],
                             }
                         },
                         {
@@ -479,8 +500,8 @@ async def process_data(
                                     "addr:city",
                                     "source",
                                 ],
-                                "where": "tags['healthcare'][1] IS NOT NULL OR tags['amenity'][1] IN ('doctors', 'dentist', 'clinic', 'hospital', 'pharmacy')",
-                                "formats": ["fgb"],
+                                "where": "tags['healthcare'] IS NOT NULL OR tags['amenity'] IN ('doctors', 'dentist', 'clinic', 'hospital', 'pharmacy')",
+                                "formats": ["geojson"],
                             }
                         },
                         {
@@ -500,8 +521,8 @@ async def process_data(
                                     "is_in",
                                     "source",
                                 ],
-                                "where": "tags['place'][1] IN ('isolated_dwelling', 'town', 'village', 'hamlet', 'city')",
-                                "formats": ["fgb"],
+                                "where": "tags['place'] IN ('isolated_dwelling', 'town', 'village', 'hamlet', 'city')",
+                                "formats": ["geojson"],
                             }
                         },
                         {
@@ -520,8 +541,8 @@ async def process_data(
                                     "addr:city",
                                     "source",
                                 ],
-                                "where": "tags['amenity'][1] IN ('mobile_money_agent','bureau_de_change','bank','microfinance','atm','sacco','money_transfer','post_office')",
-                                "formats": ["fgb"],
+                                "where": "tags['amenity'] IN ('mobile_money_agent','bureau_de_change','bank','microfinance','atm','sacco','money_transfer','post_office')",
+                                "formats": ["geojson"],
                             }
                         },
                         {
@@ -546,8 +567,8 @@ async def process_data(
                                     "addr:city",
                                     "source",
                                 ],
-                                "where": "tags['railway'][1] IN ('rail','station')",
-                                "formats": ["fgb"],
+                                "where": "tags['railway'] IN ('rail','station')",
+                                "formats": ["geojson"],
                             }
                         },
                     ],
diff --git a/API/test.py b/API/test.py
deleted file mode 100644
index 57d01d72..00000000
--- a/API/test.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import re
-
-
-def replace_key(input_str):
-    pattern = r"tags\['([^']+)'\]\[1\]"
-    match = re.search(pattern, input_str)
-
-    if match:
-        key = match.group(1)
-        return input_str.replace(match.group(0), key)
-    else:
-        return input_str
-
-
-# Example usage:
-input_str = "tags['railway'][1] IN ('rail','station')"
-result = replace_key(input_str)
-
-print(result)
diff --git a/src/app.py b/src/app.py
index f9ed2646..95988899 100644
--- a/src/app.py
+++ b/src/app.py
@@ -1133,6 +1133,7 @@ class HDX:
     def __init__(self, params):
         self.params = params
         self.iso3 = self.params.iso3
+        self.HDX_SUPPORTED_FORMATS = ["geojson", "gpkg", "kml", "shp"]
         if self.iso3:
             self.iso3 = self.iso3.lower()
         self.cid = None
@@ -1192,12 +1193,13 @@ def types_to_tables(self, type_list: list):
         return list(table_set)
 
     def format_where_clause(self, where_clause):
-        pattern = r"tags\['([^']+)'\]\[1\]"
+        pattern = r"tags\['([^']+)'\]"
         match = re.search(pattern, where_clause)
 
         if match:
             key = match.group(1)
-            return where_clause.replace(match.group(0), key)
+            replacement = f"tags['{key}'][1]"
+            return re.sub(pattern, replacement, where_clause)
         else:
             return where_clause
 
@@ -1285,7 +1287,7 @@ def process_category(self, category):
                 self.iso3 if self.iso3 else self.params.dataset.dataset_prefix,
                 category_data.select,
                 feature_type,
-                category_data.where,
+                self.format_where_clause(category_data.where),
             )
             resources = self.query_to_file(
                 extract_query, category_name, feature_type, category_data.formats
@@ -1293,6 +1295,33 @@ def process_category(self, category):
             uploaded_resources = self.zip_to_s3(resources)
             return uploaded_resources
 
+    def resource_to_hdx(self, uploaded_resources, dataset_config, category):
+        if any(
+            map(
+                lambda v: v["format_suffix"] in uploaded_resources,
+                self.HDX_SUPPORTED_FORMATS,
+            )
+        ):
+            uploader = HDXUploader(
+                hdx=dataset_config,
+                category=category,
+                default_category_path=self.default_export_path,
+                completeness_metadata={
+                    "iso3": self.iso3,
+                    "geometry": self.params.geometry,
+                },
+            )
+            uploader.init_dataset()
+            for resource in uploaded_resources:
+                if resource["format_suffix"] in self.HDX_SUPPORTED_FORMATS:
+                    uploader.add_resource(
+                        resource["filename"],
+                        resource["format_suffix"],
+                        resource["format_description"],
+                        resource["download_url"],
+                    )
+            uploader.upload_dataset()
+
     def process_hdx_tags(self):
         table_type = [
             cat_type
@@ -1327,33 +1356,17 @@ def process_hdx_tags(self):
                     )
 
                 except Exception as e:
-                    raise e
-                    # logging.error(f"An error occurred for category {category}: {e}")
-
-    def resource_to_hdx(self, uploaded_resources, dataset_config, category):
-        uploader = HDXUploader(
-            hdx=dataset_config,
-            category=category,
-            completeness_metadata={
-                "iso3": self.iso3,
-                "geometry": self.params.geometry,
-            },
-        )
-        uploader.init_dataset()
-        for resource in uploaded_resources:
-            uploader.add_resource(
-                resource["filename"],
-                resource["format_suffix"],
-                resource["format_description"],
-                resource["download_url"],
-            )
-        uploader.upload_dataset()
+                    # raise e
+                    logging.error(f"An error occurred for category {category}: {e}")
 
 
 class HDXUploader:
-    def __init__(self, category, hdx, completeness_metadata=None):
+    def __init__(
+        self, category, hdx, default_category_path, completeness_metadata=None
+    ):
         self.hdx = hdx
         self.category_name, self.category_data = list(category.items())[0]
+        self.category_path = os.path.join(default_category_path, self.category_name)
         self.dataset = None
         self.completeness_metadata = completeness_metadata
         self.data_completeness_stats = None
@@ -1361,16 +1374,6 @@ def __init__(self, category, hdx, completeness_metadata=None):
     def slugify(self, name):
         return slugify(name).replace("-", "_")
 
-    def filter_formatter(self, where_str):
-        pattern = r"tags\['([^']+)'\]\[1\]"
-        match = re.search(pattern, where_str)
-
-        if match:
-            key = match.group(1)
-            return where_str.replace(match.group(0), key)
-        else:
-            return where_str
-
     def add_notes(self):
         columns = []
         for key in self.category_data.select:
@@ -1378,9 +1381,7 @@ def add_notes(self):
                 "- [{0}](http://wiki.openstreetmap.org/wiki/Key:{0})".format(key)
             )
         columns = "\n".join(columns)
-        filter_str = HDX_FILTER_CRITERIA.format(
-            criteria=self.filter_formatter(self.category_data.where)
-        )
+        filter_str = HDX_FILTER_CRITERIA.format(criteria=self.category_data.where)
         if self.category_name.lower() in ["roads", "buildings"]:
             if self.data_completeness_stats is None:
                 if self.completeness_metadata:
@@ -1407,11 +1408,13 @@ def add_resource(
                 "url": export_url,
                 "last_modified": datetime.now().isoformat(),
             }
-            print(resource)
             self.dataset.add_update_resource(resource)
 
     def upload_dataset(self):
         if self.dataset:
+            self.dataset.save_to_json(
+                os.path.join(self.category_path, f"{self.dataset['name']}.json")
+            )
             self.dataset.set_reference_period(datetime.now())
             self.dataset.create_in_hdx(allow_no_resources=True)
 
diff --git a/src/query_builder/builder.py b/src/query_builder/builder.py
index 18d9642e..e8b3971f 100644
--- a/src/query_builder/builder.py
+++ b/src/query_builder/builder.py
@@ -26,7 +26,7 @@
 from src.validation.models import SupportedFilters, SupportedGeometryFilters
 
 HDX_FILTER_CRITERIA = """
-This theme includes all OpenStreetMap features in this area matching:
+This theme includes all OpenStreetMap features in this area matching (learn what tags means [here](https://wiki.openstreetmap.org/wiki/Tags)):
 
 {criteria}
 """

From 9fa50193ae3d8cd79e35a6fb135f2477901dee82 Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 00:47:15 +0545
Subject: [PATCH 06/20] Added multiple format support for hdx

---
 API/hdx.py                   | 2 +-
 src/app.py                   | 8 +++-----
 src/query_builder/builder.py | 2 +-
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/API/hdx.py b/API/hdx.py
index 0eb9491a..aa55761a 100644
--- a/API/hdx.py
+++ b/API/hdx.py
@@ -93,7 +93,7 @@ def __init__(self, suffix, driver_name, layer_creation_options, format_option):
 EXPORT_TYPE_MAPPING = {
     "geojson": ExportTypeInfo("geojson", "GeoJSON", [], "GDAL"),
     "shp": ExportTypeInfo("shp", "ESRI Shapefile", [], "GDAL"),
-    "gpkg": ExportTypeInfo("gpkg", "GeoPackage", [], "GDAL"),
+    "gpkg": ExportTypeInfo("gpkg", "GPKG", [], "GDAL"),
     "sqlite": ExportTypeInfo("sqlite", "SQLite", [], "GDAL"),
     "geojson": ExportTypeInfo("geojson", "FlatGeobuf", ["VERIFY_BUFFERS=NO"], "GDAL"),
     "mvt": ExportTypeInfo("mvt", "MVT", [], "GDAL"),
diff --git a/src/app.py b/src/app.py
index 95988899..d11a8609 100644
--- a/src/app.py
+++ b/src/app.py
@@ -1297,10 +1297,8 @@ def process_category(self, category):
 
     def resource_to_hdx(self, uploaded_resources, dataset_config, category):
         if any(
-            map(
-                lambda v: v["format_suffix"] in uploaded_resources,
-                self.HDX_SUPPORTED_FORMATS,
-            )
+            item["format_suffix"] in self.HDX_SUPPORTED_FORMATS
+            for item in uploaded_resources
         ):
             uploader = HDXUploader(
                 hdx=dataset_config,
@@ -1356,7 +1354,7 @@ def process_hdx_tags(self):
                     )
 
                 except Exception as e:
-                    # raise e
+                    raise e
                     logging.error(f"An error occurred for category {category}: {e}")
 
 
diff --git a/src/query_builder/builder.py b/src/query_builder/builder.py
index e8b3971f..db0c18b9 100644
--- a/src/query_builder/builder.py
+++ b/src/query_builder/builder.py
@@ -26,7 +26,7 @@
 from src.validation.models import SupportedFilters, SupportedGeometryFilters
 
 HDX_FILTER_CRITERIA = """
-This theme includes all OpenStreetMap features in this area matching (learn what tags means [here](https://wiki.openstreetmap.org/wiki/Tags)):
+This theme includes all OpenStreetMap features in this area matching ( Learn what tags means [here](https://wiki.openstreetmap.org/wiki/Tags) ) :
 
 {criteria}
 """

From 6c6f2be099dc810903a9bc56a071d108a3d78559 Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 00:49:29 +0545
Subject: [PATCH 07/20] updated typo

---
 API/hdx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/API/hdx.py b/API/hdx.py
index aa55761a..3d9e7bb1 100644
--- a/API/hdx.py
+++ b/API/hdx.py
@@ -95,7 +95,7 @@ def __init__(self, suffix, driver_name, layer_creation_options, format_option):
     "shp": ExportTypeInfo("shp", "ESRI Shapefile", [], "GDAL"),
     "gpkg": ExportTypeInfo("gpkg", "GPKG", [], "GDAL"),
     "sqlite": ExportTypeInfo("sqlite", "SQLite", [], "GDAL"),
-    "geojson": ExportTypeInfo("geojson", "FlatGeobuf", ["VERIFY_BUFFERS=NO"], "GDAL"),
+    "fgb": ExportTypeInfo("fgb", "FlatGeobuf", ["VERIFY_BUFFERS=NO"], "GDAL"),
     "mvt": ExportTypeInfo("mvt", "MVT", [], "GDAL"),
     "kml": ExportTypeInfo("kml", "KML", [], "GDAL"),
     "gpx": ExportTypeInfo("gpx", "GPX", [], "GDAL"),

From 60b66beb0802cbef763036bafc0e15417aefc073 Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 19:41:33 +0545
Subject: [PATCH 08/20] Added configurations , documentation and moved api
 logic to queue

---
 API/api_worker.py                       |  16 +-
 API/hdx.py                              | 220 ++-------------------
 API/main.py                             |   5 +-
 API/raw_data.py                         |   3 +-
 API/stats.py                            |  48 ++++-
 README.md                               |   6 +-
 docs/src/installation/configurations.md |  15 ++
 src/app.py                              | 247 ++++++++++++++++++------
 src/config.py                           |   2 +-
 src/validation/models.py                | 221 +++++++++++++++++++--
 10 files changed, 491 insertions(+), 292 deletions(-)

diff --git a/API/api_worker.py b/API/api_worker.py
index 257a973f..5dbf4d40 100644
--- a/API/api_worker.py
+++ b/API/api_worker.py
@@ -11,7 +11,7 @@
 import sozipfile.sozipfile as zipfile
 from celery import Celery
 
-from src.app import PolygonStats, RawData, S3FileTransfer
+from src.app import HDX, PolygonStats, RawData, S3FileTransfer
 from src.config import ALLOW_BIND_ZIP_FILTER
 from src.config import CELERY_BROKER_URL as celery_broker_uri
 from src.config import CELERY_RESULT_BACKEND as celery_backend
@@ -19,7 +19,7 @@
 from src.config import USE_S3_TO_UPLOAD as use_s3_to_upload
 from src.config import logger as logging
 from src.query_builder.builder import format_file_name_str
-from src.validation.models import RawDataOutputType
+from src.validation.models import DatasetConfig, RawDataOutputType
 
 celery = Celery("Raw Data API")
 celery.conf.broker_url = celery_broker_uri
@@ -186,6 +186,18 @@ def process_raw_data(self, params):
         raise ex
 
 
+@celery.task(bind=True, name="process_hdx_request")
+def process_hdx_request(self, params):
+    if not params.dataset:
+        params.dataset = DatasetConfig()
+    hdx_object = HDX(params)
+    try:
+        return hdx_object.process_hdx_tags()
+    except Exception as ex:
+        hdx_object.clean_resources()
+        raise ex
+
+
 def remove_file(path: str) -> None:
     """Used for removing temp file dir and its all content after zip file is delivered to user"""
     try:
diff --git a/API/hdx.py b/API/hdx.py
index 3d9e7bb1..4618fbe3 100644
--- a/API/hdx.py
+++ b/API/hdx.py
@@ -1,217 +1,20 @@
-from enum import Enum
-from typing import Dict, List, Optional, Union
-
-from fastapi import APIRouter, Body, Query, Request
+from fastapi import APIRouter, Body, Request
+from fastapi.responses import JSONResponse
 from fastapi_versioning import version
-from geojson_pydantic import MultiPolygon, Polygon
-from pydantic import BaseModel, Field, validator
 
-from src.app import HDX
-from src.config import ALLOWED_HDX_TAGS, ALLOWED_HDX_UPDATE_FREQUENCIES
 from src.config import LIMITER as limiter
 from src.config import RATE_LIMIT_PER_MIN
+from src.validation.models import DynamicCategoriesModel
 
-router = APIRouter(prefix="/hdx", tags=["HDX"])
-
-
-class HDXModel(BaseModel):
-    tags: List[str] = Field(
-        ...,
-        description="List of tags for the HDX model.",
-        example=["roads", "transportation", "geodata"],
-    )
-    caveats: str = Field(
-        default="OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
-        description="Caveats/Warning for the Datasets.",
-        example="OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
-    )
-    notes: str = Field(
-        default="",
-        description="Extra notes to append in notes section of hdx datasets",
-        example="Sample notes to append",
-    )
-
-    @validator("tags")
-    def validate_tags(cls, value):
-        for item in value:
-            if item.strip() not in ALLOWED_HDX_TAGS:
-                raise ValueError(
-                    f"Invalid tag {item.strip()} , Should be within {ALLOWED_HDX_TAGS}"
-                )
-        return value
-
-
-class CategoryModel(BaseModel):
-    hdx: HDXModel
-    types: List[str] = Field(
-        ...,
-        description="List of feature types (points, lines, polygons).",
-        example=["lines"],
-    )
-    select: List[str] = Field(
-        ...,
-        description="List of selected fields.",
-        example=["name", "highway"],
-    )
-    where: str = Field(
-        ...,
-        description="SQL-like condition to filter features.",
-        example="highway IS NOT NULL",
-    )
-    formats: List[str] = Field(
-        ...,
-        description="List of Export Formats (suffixes).",
-        example=["gpkg", "geojson"],
-    )
-
-    @validator("types")
-    def validate_types(cls, value):
-        allowed_types = {"points", "lines", "polygons"}
-        for item in value:
-            if item not in allowed_types:
-                raise ValueError(
-                    f"Invalid type: {item}. Allowed types are {', '.join(allowed_types)}"
-                )
-        return value
-
-    @validator("formats")
-    def validate_export_types(cls, value):
-        for export_type in value:
-            if export_type not in EXPORT_TYPE_MAPPING:
-                raise ValueError(f"Unsupported export type: {export_type}")
-        return [EXPORT_TYPE_MAPPING[export_type] for export_type in value]
-
-
-class ExportTypeInfo:
-    def __init__(self, suffix, driver_name, layer_creation_options, format_option):
-        self.suffix = suffix
-        self.driver_name = driver_name
-        self.layer_creation_options = layer_creation_options
-        self.format_option = format_option
-
+from .api_worker import process_hdx_request
 
-EXPORT_TYPE_MAPPING = {
-    "geojson": ExportTypeInfo("geojson", "GeoJSON", [], "GDAL"),
-    "shp": ExportTypeInfo("shp", "ESRI Shapefile", [], "GDAL"),
-    "gpkg": ExportTypeInfo("gpkg", "GPKG", [], "GDAL"),
-    "sqlite": ExportTypeInfo("sqlite", "SQLite", [], "GDAL"),
-    "fgb": ExportTypeInfo("fgb", "FlatGeobuf", ["VERIFY_BUFFERS=NO"], "GDAL"),
-    "mvt": ExportTypeInfo("mvt", "MVT", [], "GDAL"),
-    "kml": ExportTypeInfo("kml", "KML", [], "GDAL"),
-    "gpx": ExportTypeInfo("gpx", "GPX", [], "GDAL"),
-    "parquet": ExportTypeInfo("parquet", "PARQUET", [], "PARQUET"),
-}
-
-
-class DatasetConfig(BaseModel):
-    private: bool = Field(
-        default=False,
-        description="Make dataset private , By default False , Public is recommended",
-        example="False",
-    )
-    subnational: bool = Field(
-        default=False,
-        description="Make it true if dataset doesn't cover nation/country",
-        example="False",
-    )
-    update_frequency: str = Field(
-        default="as needed",
-        description="Update frequncy to be added on uploads",
-        example="daily",
-    )
-    dataset_title: str = Field(
-        default=None,
-        description="Dataset title which appears at top of the page",
-        example="Nepal",
-    )
-    dataset_prefix: str = Field(
-        default=None,
-        description="Dataset prefix to be appended before category name, Will be ignored if iso3 is supplied",
-        example="hotosm_npl",
-    )
-    dataset_locations: List[str] = Field(
-        default=None,
-        description="Valid dataset locations iso3",
-        example="['npl']",
-    )
-
-    @validator("update_frequency")
-    def validate_frequency(cls, value):
-        if value.strip() not in ALLOWED_HDX_UPDATE_FREQUENCIES:
-            raise ValueError(
-                f"Invalid update frequency , Should be within {ALLOWED_HDX_UPDATE_FREQUENCIES}"
-            )
-        return value.strip()
-
-
-class DynamicCategoriesModel(BaseModel):
-    iso3: Optional[str] = Field(
-        default=None,
-        description="ISO3 Country Code",
-        min_length=3,
-        max_length=3,
-        example="USA",
-    )
-    dataset: Optional[DatasetConfig] = Field(
-        default=None, description="Dataset Configurations for HDX Upload"
-    )
-
-    categories: List[Dict[str, CategoryModel]] = Field(
-        ...,
-        description="List of dynamic categories.",
-        example=[
-            {
-                "Roads": {
-                    "hdx": {
-                        "tags": ["roads", "transportation", "geodata"],
-                        "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
-                    },
-                    "types": ["lines", "polygons"],
-                    "select": ["name", "highway"],
-                    "where": "highway IS NOT NULL",
-                    "formats": ["geojson"],
-                }
-            }
-        ],
-    )
-    geometry: Optional[Union[Polygon, MultiPolygon]] = Field(
-        default=None,
-        example={
-            "type": "Polygon",
-            "coordinates": [
-                [
-                    [83.96919250488281, 28.194446860487773],
-                    [83.99751663208006, 28.194446860487773],
-                    [83.99751663208006, 28.214869548073377],
-                    [83.96919250488281, 28.214869548073377],
-                    [83.96919250488281, 28.194446860487773],
-                ]
-            ],
-        },
-    )
-
-    @validator("geometry", pre=True, always=True)
-    def set_geometry_or_iso3(cls, value, values):
-        """Either geometry or iso3 should be supplied."""
-        if value is not None and values.get("iso3") is not None:
-            raise ValueError("Only one of geometry or iso3 should be supplied.")
-        if value is None and values.get("iso3") is None:
-            raise ValueError("Either geometry or iso3 should be supplied.")
-        if value is not None:
-            dataset = values.get("dataset").dict()
-            if dataset is None:
-                raise ValueError("Dataset config should be supplied for custom polygon")
-
-            for item in dataset.keys():
-                if dataset.get(item) is None:
-                    raise ValueError(f"Missing, Dataset config : {item}")
-        return value
+router = APIRouter(prefix="/hdx", tags=["HDX"])
 
 
 @router.post("/submit/")
 @limiter.limit(f"{RATE_LIMIT_PER_MIN}/minute")
 @version(1)
-async def process_data(
+async def process_hdx_requests(
     request: Request,
     params: DynamicCategoriesModel = Body(
         ...,
@@ -578,7 +381,7 @@ async def process_data(
     ),
 ):
     """
-    Process data based on dynamic categories.
+    Process data based on dynamic categories, Fully flexible on filtering and select
 
     Args:
         request: FastAPI Request object.
@@ -587,7 +390,8 @@ async def process_data(
     Returns:
         dict: Result message.
     """
-    if not params.dataset:
-        params.dataset = DatasetConfig()
-    hdx_set = HDX(params).process_hdx_tags()
-    return {"message": "Data processed successfully"}
+    queue_name = "raw_special"
+    task = process_hdx_request.apply_async(
+        args=(params,), queue=queue_name, track_started=True
+    )
+    return JSONResponse({"task_id": task.id, "track_link": f"/tasks/status/{task.id}/"})
diff --git a/API/main.py b/API/main.py
index c72ec28b..87edae7c 100644
--- a/API/main.py
+++ b/API/main.py
@@ -27,6 +27,7 @@
 from slowapi.errors import RateLimitExceeded
 
 from src.config import (
+    ENABLE_HDX_EXPORTS,
     ENABLE_POLYGON_STATISTICS_ENDPOINTS,
     EXPORT_PATH,
     LIMITER,
@@ -67,7 +68,9 @@
 app.include_router(auth_router)
 app.include_router(raw_data_router)
 app.include_router(tasks_router)
-app.include_router(hdx_router)
+
+if ENABLE_HDX_EXPORTS:
+    app.include_router(hdx_router)
 if ENABLE_POLYGON_STATISTICS_ENDPOINTS:
     app.include_router(stats_router)
 
diff --git a/API/raw_data.py b/API/raw_data.py
index 830ccfa0..b9f010e0 100644
--- a/API/raw_data.py
+++ b/API/raw_data.py
@@ -443,7 +443,8 @@ def get_osm_current_snapshot_as_file(
                         ],
                     )
 
-    queue_name = "recurring_queue" if not params.uuid else "raw_default"
+    # queue_name = "raw_special" if not params.uuid else "raw_default"
+    queue_name = "raw_default"  # Everything directs to default now
     task = process_raw_data.apply_async(
         args=(params,), queue=queue_name, track_started=True
     )
diff --git a/API/stats.py b/API/stats.py
index 302bc163..85c3d045 100644
--- a/API/stats.py
+++ b/API/stats.py
@@ -1,4 +1,6 @@
-from fastapi import APIRouter, Request
+import json
+
+from fastapi import APIRouter, Body, Request
 from fastapi_versioning import version
 
 from src.app import PolygonStats
@@ -12,7 +14,38 @@
 @router.post("/polygon/")
 @limiter.limit(f"{POLYGON_STATISTICS_API_RATE_LIMIT}/minute")
 @version(1)
-async def get_polygon_stats(request: Request, params: StatsRequestParams):
+async def get_polygon_stats(
+    request: Request,
+    params: StatsRequestParams = Body(
+        ...,
+        description="Get Summary and raw stats related to polygon",
+        openapi_examples={
+            "normal_polygon": {
+                "summary": "Normal Example of requesting stats",
+                "description": "Query to extract stats using Custom Polygon",
+                "value": {
+                    "geometry": {
+                        "type": "Polygon",
+                        "coordinates": [
+                            [
+                                [83.96919250488281, 28.194446860487773],
+                                [83.99751663208006, 28.194446860487773],
+                                [83.99751663208006, 28.214869548073377],
+                                [83.96919250488281, 28.214869548073377],
+                                [83.96919250488281, 28.194446860487773],
+                            ]
+                        ],
+                    }
+                },
+            },
+            "normal_iso": {
+                "summary": "Query to extract stats using iso",
+                "description": "Extract stats using iso3 only, For eg : for Nepal",
+                "value": {"iso3": "npl"},
+            },
+        },
+    ),
+):
     """Get statistics for the specified polygon.
 
     Args:
@@ -22,6 +55,15 @@ async def get_polygon_stats(request: Request, params: StatsRequestParams):
     Returns:
         dict: A dictionary containing statistics for the specified polygon.
     """
-    generator = PolygonStats(params.geometry, params.iso3)
+    feature = None
+    if params.geometry:
+        feature = {
+            "type": "Feature",
+            "geometry": json.loads(params.geometry.json()),
+            "properties": {},
+        }
+    if params.iso3:
+        params.iso3 = params.iso3.lower()
+    generator = PolygonStats(feature, params.iso3)
 
     return generator.get_summary_stats()
diff --git a/README.md b/README.md
index 1cf78f3c..f3179f43 100644
--- a/README.md
+++ b/README.md
@@ -106,7 +106,7 @@ uvicorn API.main:app --reload
 ### Queues 
 
 Currently there are two type of queue implemented : 
-- "recurring_queue" : Queue for recurring exports which will replace the previous exports if present on the system , can be enabled through uuid:false API Param 
+- "raw_special" : Queue for recurring exports which will replace the previous exports if present on the system , can be enabled through uuid:false API Param 
 - "raw_default" : Queue for default exports which will create each unique id for exports 
 
 ### Start Celery Worker
@@ -119,7 +119,7 @@ You should be able to start [celery](https://docs.celeryq.dev/en/stable/getting-
   ```
 - Start for recurring queue 
   ```
-  celery --app API.api_worker worker --loglevel=INFO --queues="recurring_queue" -n 'recurring_worker'
+  celery --app API.api_worker worker --loglevel=INFO --queues="raw_special" -n 'recurring_worker'
   ```
 
 Set no of request that a worker can take at a time by using --concurrency 
@@ -129,7 +129,7 @@ Set no of request that a worker can take at a time by using --concurrency
 Raw Data API uses flower for monitoring the Celery distributed queue. Run this command on a different shell , if you are running redis on same machine your broker could be `redis://localhost:6379//`.
 
 ```
-celery --broker=redis://redis:6379// --app API.api_worker flower --port=5000 --queues="recurring_queue,raw_default"
+celery --broker=redis://redis:6379// --app API.api_worker flower --port=5000 --queues="raw_special,raw_default"
 ```
 
 ### Navigate to the docs to view Raw Data API endpoints
diff --git a/docs/src/installation/configurations.md b/docs/src/installation/configurations.md
index 4f5baeaa..0620839c 100644
--- a/docs/src/installation/configurations.md
+++ b/docs/src/installation/configurations.md
@@ -31,6 +31,7 @@ The following sections are recognised.
 - `[API_CONFIG]` - API service related configuration. Required.
 - `[EXPORT_UPLOAD]` - For external file hosts like S3. Optional.
 - `[SENTRY]` - Sentry monitoring configuration. Optional.
+- `[HDX]` - HDX Exports related configuration. Optional.
 
 The following are the different configuration options that are accepted.
 
@@ -66,6 +67,12 @@ The following are the different configuration options that are accepted.
 | `AWS_SECRET_ACCESS_KEY` | `AWS_SECRET_ACCESS_KEY` | `[EXPORT_UPLOAD]` | _none_ | AWS Secret Access Key for S3 access | CONDITIONAL |
 | `SENTRY_DSN` | `SENTRY_DSN` | `[SENTRY]` | _none_ | Sentry Data Source Name | OPTIONAL |
 | `SENTRY_RATE` | `SENTRY_RATE` | `[SENTRY]` | `1.0` | Sample rate percentage for shipping errors to sentry; Allowed values between 0 (0%) to 1 (100%)| OPTIONAL |
+| `ENABLE_HDX_EXPORTS` | `ENABLE_HDX_EXPORTS` | `[HDX]` | False | Enables hdx related endpoints and imports | OPTIONAL |
+| `HDX_SITE` | `HDX_SITE` | `[HDX]` | 'demo' | HDX site to point , By default demo site , use prod for production | CONDITIONAL |
+| `HDX_API_KEY` | `HDX_API_KEY` | `[HDX]` | None | Your API Secret key for hdx upload , should have write access and it is compulsory if ENABLE_HDX_EXPORTS is True | CONDITIONAL |
+| `HDX_OWNER_ORG` | `HDX_OWNER_ORG` | `[HDX]` | None | Your HDX organization ID| CONDITIONAL |
+| `HDX_MAINTAINER` | `HDX_MAINTAINER` | `[HDX]` | None | Your HDX Maintainer ID | CONDITIONAL |
+
 
 ## Which Service uses which settings?
 
@@ -102,6 +109,14 @@ The following are the different configuration options that are accepted.
 | `AWS_SECRET_ACCESS_KEY` | TBD | No | Yes |
 | `SENTRY_DSN` | TBD | Yes | No |
 | `SENTRY_RATE` | TBD | Yes | No |
+| `ENABLE_HDX_EXPORTS` | `[HDX]` | Yes | Yes |
+| `HDX_SITE` | `[HDX]` | Yes | Yes |
+| `HDX_API_KEY` | `[HDX]` | Yes | Yes |
+| `HDX_OWNER_ORG` | `[HDX]` | Yes | Yes |
+| `HDX_MAINTAINER` | `[HDX]` | Yes | Yes |
+
+
+
 
 ## Compulsory Configuration
 
diff --git a/src/app.py b/src/app.py
index d11a8609..1a119ecd 100644
--- a/src/app.py
+++ b/src/app.py
@@ -26,6 +26,7 @@
 import sys
 import time
 import uuid
+from collections import namedtuple
 from datetime import datetime
 from datetime import datetime as dt
 from datetime import timezone
@@ -54,7 +55,7 @@
     EXPORT_MAX_AREA_SQKM,
 )
 from src.config import EXPORT_PATH as export_path
-from src.config import HDX_MAINTAINER, HDX_OWNER_ORG
+from src.config import HDX_MAINTAINER, HDX_OWNER_ORG, HDX_URL_PREFIX
 from src.config import INDEX_THRESHOLD as index_threshold
 from src.config import POLYGON_STATISTICS_API_URL
 from src.config import USE_CONNECTION_POOLING as use_connection_pooling
@@ -939,7 +940,7 @@ def __init__(self, geojson=None, iso3=None):
                 raise HTTPException(status_code=404, detail="Invalid iso3 code")
             self.INPUT_GEOM = result[0]
         else:
-            self.INPUT_GEOM = dumps(json_loads(geojson.json()))
+            self.INPUT_GEOM = dumps(geojson)
 
     @staticmethod
     def get_building_pattern_statement(
@@ -1160,7 +1161,9 @@ def __init__(self, params):
             if not self.params.dataset.dataset_locations:
                 self.params.dataset.dataset_locations = dataset_locations
 
-        self.uuid = str(uuid.uuid4())
+        self.uuid = str(uuid.uuid4().hex)
+        self.parallel_process_state = False
+
         self.default_export_path = os.path.join(
             export_path,
             self.uuid,
@@ -1170,12 +1173,11 @@ def __init__(self, params):
         if os.path.exists(self.default_export_path):
             shutil.rmtree(self.default_export_path)
         os.makedirs(self.default_export_path)
-        self.duck_db_instance = DuckDB(
-            os.path.join(
-                self.default_export_path,
-                f"{self.iso3 if self.iso3 else self.params.dataset.dataset_prefix}.db",
-            )
+        self.duck_db_db_path = os.path.join(
+            self.default_export_path,
+            f"{self.iso3 if self.iso3 else self.params.dataset.dataset_prefix}.db",
         )
+        self.duck_db_instance = DuckDB(self.duck_db_db_path)
 
     def types_to_tables(self, type_list: list):
         mapping = {
@@ -1203,22 +1205,26 @@ def format_where_clause(self, where_clause):
         else:
             return where_clause
 
-    def zip_to_s3(self, resources):
-        for resource in resources:
-            s3_upload_name = os.path.relpath(
-                resource["zip_path"], os.path.join(export_path, self.uuid)
+    def upload_to_s3(self, resource_path):
+        if not USE_S3_TO_UPLOAD:
+            raise HTTPException(
+                status_code=404, detail="S3 Export service is disabled on server"
             )
+        s3_upload_name = os.path.relpath(
+            resource_path, os.path.join(export_path, self.uuid)
+        )
+        file_transfer_obj = S3FileTransfer()
+        download_url = file_transfer_obj.upload(
+            resource_path,
+            str(s3_upload_name),
+        )
+        return download_url
 
-            if not USE_S3_TO_UPLOAD:
-                raise HTTPException(
-                    status_code=404, detail="S3 Export service is disabled on server"
-                )
-            file_transfer_obj = S3FileTransfer()
-            download_url = file_transfer_obj.upload(
-                resource["zip_path"],
-                str(s3_upload_name),
+    def zip_to_s3(self, resources):
+        for resource in resources:
+            resource["download_url"] = self.upload_to_s3(
+                resource_path=resource["zip_path"]
             )
-            resource["download_url"] = download_url
             os.remove(resource["zip_path"])
         return resources
 
@@ -1243,12 +1249,13 @@ def file_to_zip(self, working_dir, zip_path):
         return zip_path
 
     def query_to_file(self, query, category_name, feature_type, export_formats):
-        category_name = category_name.lower().replace(" ", "_")
+        category_name = slugify(category_name.lower()).replace("-", "_")
         file_export_path = os.path.join(
             self.default_export_path, category_name, feature_type
         )
         resources = []
-        for export_format in export_formats:
+
+        def process_export_format(export_format):
             export_format_path = os.path.join(file_export_path, export_format.suffix)
             os.makedirs(export_format_path, exist_ok=True)
 
@@ -1277,11 +1284,41 @@ def query_to_file(self, query, category_name, feature_type, export_formats):
             resource["format_suffix"] = export_format.suffix
             resource["format_description"] = export_format.driver_name
 
+            return resource
+
+        if self.parallel_process_state is False and len(export_formats) > 1:
+            with concurrent.futures.ThreadPoolExecutor(
+                max_workers=os.cpu_count()
+            ) as executor:
+                futures = [
+                    executor.submit(process_export_format, export_format)
+                    for export_format in export_formats
+                ]
+                resources = [
+                    future.result()
+                    for future in concurrent.futures.as_completed(futures)
+                ]
+        else:
+            resource = process_export_format(export_formats[0])
             resources.append(resource)
+
         return resources
 
+    def process_category_result(self, category_result):
+        if self.params.hdx_upload:
+            return self.resource_to_hdx(
+                uploaded_resources=category_result.uploaded_resources,
+                dataset_config=self.params.dataset,
+                category=category_result.category,
+            )
+
+        return self.resource_to_response(
+            category_result.uploaded_resources, category_result.category
+        )
+
     def process_category(self, category):
         category_name, category_data = list(category.items())[0]
+        all_uploaded_resources = []
         for feature_type in category_data.types:
             extract_query = extract_features_duckdb(
                 self.iso3 if self.iso3 else self.params.dataset.dataset_prefix,
@@ -1293,7 +1330,26 @@ def process_category(self, category):
                 extract_query, category_name, feature_type, category_data.formats
             )
             uploaded_resources = self.zip_to_s3(resources)
-            return uploaded_resources
+            all_uploaded_resources.extend(uploaded_resources)
+        return all_uploaded_resources
+
+    def resource_to_response(self, uploaded_resources, category):
+        category_name, category_data = list(category.items())[0]
+
+        dataset_info = {}
+        resources = []
+        for resource in uploaded_resources:
+            resource_meta = {
+                "name": resource["filename"],
+                "format": resource["format_suffix"],
+                "description": resource["format_description"],
+                "url": resource["download_url"],
+                "last_modifed": datetime.now().isoformat(),
+            }
+            resource_meta["uploaded_to_hdx"]: False
+            resources.append(resource_meta)
+        dataset_info["resources"] = resources
+        return {category_name: dataset_info}
 
     def resource_to_hdx(self, uploaded_resources, dataset_config, category):
         if any(
@@ -1304,21 +1360,35 @@ def resource_to_hdx(self, uploaded_resources, dataset_config, category):
                 hdx=dataset_config,
                 category=category,
                 default_category_path=self.default_export_path,
+                uuid=self.uuid,
                 completeness_metadata={
                     "iso3": self.iso3,
                     "geometry": self.params.geometry,
                 },
             )
             uploader.init_dataset()
+            non_hdx_resources = []
             for resource in uploaded_resources:
+                resource_meta = {
+                    "name": resource["filename"],
+                    "format": resource["format_suffix"],
+                    "description": resource["format_description"],
+                    "url": resource["download_url"],
+                    "last_modifed": datetime.now().isoformat(),
+                }
                 if resource["format_suffix"] in self.HDX_SUPPORTED_FORMATS:
-                    uploader.add_resource(
-                        resource["filename"],
-                        resource["format_suffix"],
-                        resource["format_description"],
-                        resource["download_url"],
-                    )
-            uploader.upload_dataset()
+                    uploader.add_resource(resource_meta)
+                else:
+                    resource_meta["uploaded_to_hdx"]: False
+                    non_hdx_resources.append(resource_meta)
+            category_name, hdx_dataset_info = uploader.upload_dataset(self.params.meta)
+            hdx_dataset_info["resources"].extend(non_hdx_resources)
+            return {category_name: hdx_dataset_info}
+
+    def clean_resources(self):
+        temp_dir = os.path.join(export_path, self.uuid)
+        if os.path.exists(temp_dir):
+            shutil.rmtree(temp_dir)
 
     def process_hdx_tags(self):
         table_type = [
@@ -1327,47 +1397,92 @@ def process_hdx_tags(self):
             for cat_type in list(category.values())[0].types
         ]
         table_names = self.types_to_tables(list(set(table_type)))
-
+        base_table_name = self.iso3 if self.iso3 else self.params.dataset.dataset_prefix
         for table in table_names:
             create_table = postgres2duckdb_query(
-                self.iso3 if self.iso3 else self.params.dataset.dataset_prefix,
+                base_table_name,
                 table,
                 self.cid,
                 self.params.geometry,
             )
             self.duck_db_instance.run_query(create_table.strip(), attach_pgsql=True)
+        CategoryResult = namedtuple(
+            "CategoryResult", ["category", "uploaded_resources"]
+        )
+
+        tag_process_results = []
+        dataset_results = []
+        if len(self.params.categories) > 1:
+            self.parallel_process_state = True
+            with concurrent.futures.ThreadPoolExecutor(
+                max_workers=os.cpu_count() * 2
+            ) as executor:
+                futures = {
+                    executor.submit(self.process_category, category): category
+                    for category in self.params.categories
+                }
+
+                for future in concurrent.futures.as_completed(futures):
+                    category = futures[future]
+                    uploaded_resources = future.result()
+                    category_result = CategoryResult(
+                        category=category, uploaded_resources=uploaded_resources
+                    )
+                    tag_process_results.append(category_result)
+        else:
+            resources = self.process_category(self.params.categories[0])
+            category_result = CategoryResult(
+                category=self.params.categories[0], uploaded_resources=resources
+            )
+            tag_process_results.append(category_result)
 
         with concurrent.futures.ThreadPoolExecutor(
             max_workers=os.cpu_count() * 2
         ) as executor:
             futures = {
-                executor.submit(self.process_category, category): category
-                for category in self.params.categories
+                executor.submit(self.process_category_result, result): result
+                for result in tag_process_results
             }
 
             for future in concurrent.futures.as_completed(futures):
-                category = futures[future]
-                try:
-                    uploaded_resources = future.result()
-                    self.resource_to_hdx(
-                        uploaded_resources, self.params.dataset, category
-                    )
+                result = futures[future]
+                result_data = future.result()
+                dataset_results.append(result_data)
 
-                except Exception as e:
-                    raise e
-                    logging.error(f"An error occurred for category {category}: {e}")
+        result = {"datasets": dataset_results}
+        if self.params.meta:
+            db_dump_path = os.path.join(
+                self.default_export_path,
+                "DB_DUMP",
+            )
+            os.makedirs(db_dump_path, exist_ok=True)
+            export_db = f"""EXPORT DATABASE '{db_dump_path}' (FORMAT PARQUET, COMPRESSION ZSTD, ROW_GROUP_SIZE 100000);"""
+            self.duck_db_instance.run_query(export_db, load_spatial=True)
+            db_zip_download_url = self.upload_to_s3(
+                self.file_to_zip(
+                    working_dir=db_dump_path,
+                    zip_path=os.path.join(self.default_export_path, "dbdump.zip"),
+                )
+            )
+            result["db_dump"] = db_zip_download_url
+        self.clean_resources()
+        return result
 
 
 class HDXUploader:
     def __init__(
-        self, category, hdx, default_category_path, completeness_metadata=None
+        self, category, hdx, uuid, default_category_path, completeness_metadata=None
     ):
         self.hdx = hdx
         self.category_name, self.category_data = list(category.items())[0]
-        self.category_path = os.path.join(default_category_path, self.category_name)
+        self.category_path = os.path.join(
+            default_category_path, slugify(self.category_name.lower()).replace("-", "_")
+        )
         self.dataset = None
+        self.uuid = uuid
         self.completeness_metadata = completeness_metadata
         self.data_completeness_stats = None
+        self.resources = []
 
     def slugify(self, name):
         return slugify(name).replace("-", "_")
@@ -1385,36 +1500,48 @@ def add_notes(self):
                 if self.completeness_metadata:
                     self.data_completeness_stats = PolygonStats(
                         iso3=self.completeness_metadata["iso3"],
-                        geojson=self.completeness_metadata["geometry"],
+                        geojson=self.completeness_metadata["geometry"].json()
+                        if self.completeness_metadata["geometry"]
+                        else None,
                     ).get_summary_stats()
             if self.data_completeness_stats:
                 self.category_data.hdx.notes += f'{self.data_completeness_stats["summary"][self.category_name.lower()]}\n'
-                self.category_data.hdx.notes += "Read about what this summary means, [indicators](https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/indicators.md) , [metrics](https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/metrics.md)"
+                self.category_data.hdx.notes += "Read about what this summary means, [indicators](https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/indicators.md) , [metrics](https://github.com/hotosm/raw-data-api/tree/develop/docs/src/stats/metrics.md)\n"
 
         return self.category_data.hdx.notes + HDX_MARKDOWN.format(
             columns=columns, filter_str=filter_str
         )
 
-    def add_resource(
-        self, resource_name, resource_format, resource_description, export_url
-    ):
+    def add_resource(self, resource_meta):
         if self.dataset:
-            resource = {
-                "name": resource_name,
-                "format": resource_format,
-                "description": resource_description,
-                "url": export_url,
-                "last_modified": datetime.now().isoformat(),
-            }
-            self.dataset.add_update_resource(resource)
+            self.resources.append(resource_meta)
+            self.dataset.add_update_resource(resource_meta)
 
-    def upload_dataset(self):
+    def upload_dataset(self, dump_config_to_s3=False):
         if self.dataset:
+            dataset_info = {}
+            dt_config_path = os.path.join(
+                self.category_path, f"{self.dataset['name']}.json"
+            )
             self.dataset.save_to_json(
                 os.path.join(self.category_path, f"{self.dataset['name']}.json")
             )
+            if dump_config_to_s3:
+                s3_upload_name = os.path.relpath(
+                    dt_config_path, os.path.join(export_path, self.uuid)
+                )
+                file_transfer_obj = S3FileTransfer()
+                dataset_info["config"] = file_transfer_obj.upload(
+                    dt_config_path,
+                    str(s3_upload_name),
+                )
+
             self.dataset.set_reference_period(datetime.now())
             self.dataset.create_in_hdx(allow_no_resources=True)
+            dataset_info["name"] = self.dataset["name"]
+            dataset_info["hdx_url"] = f"{HDX_URL_PREFIX}/dataset/{self.dataset['name']}"
+            dataset_info["resources"] = self.resources
+            return self.category_name, dataset_info
 
     def init_dataset(self):
         dataset_prefix = self.hdx.dataset_prefix
diff --git a/src/config.py b/src/config.py
index 6cdc6c4c..caf0229a 100644
--- a/src/config.py
+++ b/src/config.py
@@ -191,7 +191,7 @@
         "HDX", "HDX_OWNER_ORG", fallback="225b9f7d-e7cb-4156-96a6-44c9c58d31e3"
     )
     HDX_MAINTAINER = os.environ.get("HDX_MAINTAINER") or config.get(
-        "HDX", "HDX_MAINTAINER", fallback="6a0688ce-8521-46e2-8edd-8e26c0851ebd"
+        "HDX", "HDX_MAINTAINER", fallback=None
     )
     from hdx.api.configuration import Configuration
 
diff --git a/src/validation/models.py b/src/validation/models.py
index b7ce0a19..b0185e23 100644
--- a/src/validation/models.py
+++ b/src/validation/models.py
@@ -29,6 +29,8 @@
 
 from src.config import (
     ALLOW_BIND_ZIP_FILTER,
+    ALLOWED_HDX_TAGS,
+    ALLOWED_HDX_UPDATE_FREQUENCIES,
     ENABLE_POLYGON_STATISTICS_ENDPOINTS,
     ENABLE_TILES,
     EXPORT_MAX_AREA_SQKM,
@@ -251,6 +253,13 @@ class Config:
 
 
 class StatsRequestParams(BaseModel):
+    iso3: Optional[str] = Field(
+        default=None,
+        description="ISO3 Country Code.",
+        min_length=3,
+        max_length=3,
+        example="NPL",
+    )
     geometry: Optional[Union[Polygon, MultiPolygon]] = Field(
         default=None,
         example={
@@ -266,12 +275,201 @@ class StatsRequestParams(BaseModel):
             ],
         },
     )
+
+    @validator("geometry", pre=True, always=True)
+    def set_geometry_or_iso3(cls, value, values):
+        """Either geometry or iso3 should be supplied."""
+        if value is not None and values.get("iso3") is not None:
+            raise ValueError("Only one of geometry or iso3 should be supplied.")
+        if value is None and values.get("iso3") is None:
+            raise ValueError("Either geometry or iso3 should be supplied.")
+        return value
+
+
+### HDX BLock
+
+
+class HDXModel(BaseModel):
+    tags: List[str] = Field(
+        ...,
+        description="List of tags for the HDX model.",
+        example=["roads", "transportation", "geodata"],
+    )
+    caveats: str = Field(
+        default="OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+        description="Caveats/Warning for the Datasets.",
+        example="OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+    )
+    notes: str = Field(
+        default="",
+        description="Extra notes to append in notes section of hdx datasets",
+        example="Sample notes to append",
+    )
+
+    @validator("tags")
+    def validate_tags(cls, value):
+        for item in value:
+            if item.strip() not in ALLOWED_HDX_TAGS:
+                raise ValueError(
+                    f"Invalid tag {item.strip()} , Should be within {ALLOWED_HDX_TAGS}"
+                )
+        return value
+
+
+class CategoryModel(BaseModel):
+    hdx: HDXModel
+    types: List[str] = Field(
+        ...,
+        description="List of feature types (points, lines, polygons).",
+        example=["lines"],
+    )
+    select: List[str] = Field(
+        ...,
+        description="List of selected fields.",
+        example=["name", "highway"],
+    )
+    where: str = Field(
+        ...,
+        description="SQL-like condition to filter features.",
+        example="highway IS NOT NULL",
+    )
+    formats: List[str] = Field(
+        ...,
+        description="List of Export Formats (suffixes).",
+        example=["gpkg", "geojson"],
+    )
+
+    @validator("types")
+    def validate_types(cls, value):
+        allowed_types = {"points", "lines", "polygons"}
+        for item in value:
+            if item not in allowed_types:
+                raise ValueError(
+                    f"Invalid type: {item}. Allowed types are {', '.join(allowed_types)}"
+                )
+        return value
+
+    @validator("formats")
+    def validate_export_types(cls, value):
+        for export_type in value:
+            if export_type not in EXPORT_TYPE_MAPPING:
+                raise ValueError(f"Unsupported export type: {export_type}")
+        return [EXPORT_TYPE_MAPPING[export_type] for export_type in value]
+
+
+class ExportTypeInfo:
+    def __init__(self, suffix, driver_name, layer_creation_options, format_option):
+        self.suffix = suffix
+        self.driver_name = driver_name
+        self.layer_creation_options = layer_creation_options
+        self.format_option = format_option
+
+
+EXPORT_TYPE_MAPPING = {
+    "geojson": ExportTypeInfo("geojson", "GeoJSON", [], "GDAL"),
+    "shp": ExportTypeInfo("shp", "ESRI Shapefile", [], "GDAL"),
+    "gpkg": ExportTypeInfo("gpkg", "GPKG", [], "GDAL"),
+    "sqlite": ExportTypeInfo("sqlite", "SQLite", [], "GDAL"),
+    "fgb": ExportTypeInfo("fgb", "FlatGeobuf", ["VERIFY_BUFFERS=NO"], "GDAL"),
+    "mvt": ExportTypeInfo("mvt", "MVT", [], "GDAL"),
+    "kml": ExportTypeInfo("kml", "KML", [], "GDAL"),
+    "gpx": ExportTypeInfo("gpx", "GPX", [], "GDAL"),
+    "parquet": ExportTypeInfo("parquet", "PARQUET", [], "PARQUET"),
+}
+
+
+class DatasetConfig(BaseModel):
+    private: bool = Field(
+        default=False,
+        description="Make dataset private , By default False , Public is recommended",
+        example="False",
+    )
+    subnational: bool = Field(
+        default=False,
+        description="Make it true if dataset doesn't cover nation/country",
+        example="False",
+    )
+    update_frequency: str = Field(
+        default="as needed",
+        description="Update frequncy to be added on uploads",
+        example="daily",
+    )
+    dataset_title: str = Field(
+        default=None,
+        description="Dataset title which appears at top of the page",
+        example="Nepal",
+    )
+    dataset_prefix: str = Field(
+        default=None,
+        description="Dataset prefix to be appended before category name, Will be ignored if iso3 is supplied",
+        example="hotosm_npl",
+    )
+    dataset_locations: List[str] = Field(
+        default=None,
+        description="Valid dataset locations iso3",
+        example="['npl']",
+    )
+
+    @validator("update_frequency")
+    def validate_frequency(cls, value):
+        if value.strip() not in ALLOWED_HDX_UPDATE_FREQUENCIES:
+            raise ValueError(
+                f"Invalid update frequency , Should be within {ALLOWED_HDX_UPDATE_FREQUENCIES}"
+            )
+        return value.strip()
+
+
+class DynamicCategoriesModel(BaseModel):
     iso3: Optional[str] = Field(
         default=None,
-        description="ISO3 Country Code.",
+        description="ISO3 Country Code",
         min_length=3,
         max_length=3,
-        example="NPL",
+        example="USA",
+    )
+    dataset: Optional[DatasetConfig] = Field(
+        default=None, description="Dataset Configurations for HDX Upload"
+    )
+    meta: bool = Field(
+        default=False,
+        description="Dumps Meta db in parquet format & hdx config json to s3",
+    )
+    hdx_upload: bool = Field(
+        default=True, description="Enable/Disable uploading dataset to hdx"
+    )
+
+    categories: List[Dict[str, CategoryModel]] = Field(
+        ...,
+        description="List of dynamic categories.",
+        example=[
+            {
+                "Roads": {
+                    "hdx": {
+                        "tags": ["roads", "transportation", "geodata"],
+                        "caveats": "OpenStreetMap data is crowd sourced and cannot be considered to be exhaustive",
+                    },
+                    "types": ["lines", "polygons"],
+                    "select": ["name", "highway"],
+                    "where": "highway IS NOT NULL",
+                    "formats": ["geojson"],
+                }
+            }
+        ],
+    )
+    geometry: Optional[Union[Polygon, MultiPolygon]] = Field(
+        default=None,
+        example={
+            "type": "Polygon",
+            "coordinates": [
+                [
+                    [83.96919250488281, 28.194446860487773],
+                    [83.99751663208006, 28.194446860487773],
+                    [83.99751663208006, 28.214869548073377],
+                    [83.96919250488281, 28.214869548073377],
+                    [83.96919250488281, 28.194446860487773],
+                ]
+            ],
+        },
     )
 
     @validator("geometry", pre=True, always=True)
@@ -281,15 +479,12 @@ def set_geometry_or_iso3(cls, value, values):
             raise ValueError("Only one of geometry or iso3 should be supplied.")
         if value is None and values.get("iso3") is None:
             raise ValueError("Either geometry or iso3 should be supplied.")
-        return value
-
-    @validator("geometry", pre=True, always=True)
-    def validate_geometry(cls, value):
-        """Converts geometry to geojson feature."""
         if value is not None:
-            feature = {
-                "type": "Feature",
-                "geometry": json.loads(value.json()),
-                "properties": {},
-            }
-            return feature
+            dataset = values.get("dataset").dict()
+            if dataset is None:
+                raise ValueError("Dataset config should be supplied for custom polygon")
+
+            for item in dataset.keys():
+                if dataset.get(item) is None:
+                    raise ValueError(f"Missing, Dataset config : {item}")
+        return value

From 2376428c7a77bb0c7037bd48a138c8229db6bbbd Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 20:06:41 +0545
Subject: [PATCH 09/20] upgrade python to 3.10 for workflows

---
 .github/workflows/Unit-Test.yml      | 4 ++--
 .github/workflows/build.yml          | 4 ++--
 .github/workflows/code-check.yml     | 6 +++---
 .github/workflows/publish_mkdocs.yml | 8 ++++----
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/Unit-Test.yml b/.github/workflows/Unit-Test.yml
index e021283e..123e1960 100644
--- a/.github/workflows/Unit-Test.yml
+++ b/.github/workflows/Unit-Test.yml
@@ -31,10 +31,10 @@ jobs:
         options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 2
     steps:
       - uses: actions/checkout@v2
-      - name: Set up Python 3.8
+      - name: Set up Python 3.10
         uses: actions/setup-python@v1
         with:
-          python-version: 3.8
+          python-version: 3.10
 
       - name: Insert sample db data
         run: |
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index b689ac9d..fe9dc0f1 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -33,10 +33,10 @@ jobs:
         options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 2
     steps:
       - uses: actions/checkout@v2
-      - name: Set up Python 3.8
+      - name: Set up Python 3.10
         uses: actions/setup-python@v1
         with:
-          python-version: 3.8
+          python-version: 3.10
       - name: Install necessary dependecies for rawdata loading
         run: sudo apt-get update && sudo apt-get install osm2pgsql
       - name: check version
diff --git a/.github/workflows/code-check.yml b/.github/workflows/code-check.yml
index 35a56d58..1b19d083 100644
--- a/.github/workflows/code-check.yml
+++ b/.github/workflows/code-check.yml
@@ -2,7 +2,7 @@ name: Code Check - Linting using flake8
 on:
   push:
     paths-ignore:
-      - 'infra/**'
+      - "infra/**"
     branches:
       - master
       - develop
@@ -16,10 +16,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
-      - name: Set up Python 3.8
+      - name: Set up Python 3.10
         uses: actions/setup-python@v2
         with:
-          python-version: 3.8
+          python-version: 3.10
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
diff --git a/.github/workflows/publish_mkdocs.yml b/.github/workflows/publish_mkdocs.yml
index 5f511b5a..ed95c3b6 100644
--- a/.github/workflows/publish_mkdocs.yml
+++ b/.github/workflows/publish_mkdocs.yml
@@ -6,8 +6,8 @@ on:
       - master
     paths:
       # Only rebuild documentation when docs have changed
-      - 'docs/**'
-      - '.github/workflows/publish_mkdocs.yml'
+      - "docs/**"
+      - ".github/workflows/publish_mkdocs.yml"
 permissions:
   contents: write
 jobs:
@@ -15,10 +15,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
-      - name: Set up Python 3.8
+      - name: Set up Python 3.10
         uses: actions/setup-python@v4
         with:
-          python-version: 3.8
+          python-version: 3.10
           publish_branch: gh-pages
       - name: Install Dependencies
         run: |

From 91b06b5b77c0dcd2d12cda3e8175d39080a2d4db Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 20:08:52 +0545
Subject: [PATCH 10/20] Replace python version to specific version of 3.10

---
 .github/workflows/Unit-Test.yml      | 2 +-
 .github/workflows/build.yml          | 2 +-
 .github/workflows/code-check.yml     | 2 +-
 .github/workflows/publish_mkdocs.yml | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/Unit-Test.yml b/.github/workflows/Unit-Test.yml
index 123e1960..7ba8a5a9 100644
--- a/.github/workflows/Unit-Test.yml
+++ b/.github/workflows/Unit-Test.yml
@@ -34,7 +34,7 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v1
         with:
-          python-version: 3.10
+          python-version: 3.10.13
 
       - name: Insert sample db data
         run: |
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index fe9dc0f1..2018770f 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -36,7 +36,7 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v1
         with:
-          python-version: 3.10
+          python-version: 3.10.13
       - name: Install necessary dependecies for rawdata loading
         run: sudo apt-get update && sudo apt-get install osm2pgsql
       - name: check version
diff --git a/.github/workflows/code-check.yml b/.github/workflows/code-check.yml
index 1b19d083..c06208c4 100644
--- a/.github/workflows/code-check.yml
+++ b/.github/workflows/code-check.yml
@@ -19,7 +19,7 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v2
         with:
-          python-version: 3.10
+          python-version: 3.10.13
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
diff --git a/.github/workflows/publish_mkdocs.yml b/.github/workflows/publish_mkdocs.yml
index ed95c3b6..79fff6f8 100644
--- a/.github/workflows/publish_mkdocs.yml
+++ b/.github/workflows/publish_mkdocs.yml
@@ -18,7 +18,7 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v4
         with:
-          python-version: 3.10
+          python-version: 3.10.13
           publish_branch: gh-pages
       - name: Install Dependencies
         run: |

From fc6c4ad65af1fdb0a5dc05bc60b8fbdc3876dd98 Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 20:12:41 +0545
Subject: [PATCH 11/20] Updated requirements , fixed typo error

---
 requirements.txt | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 59c11f54..494e1cae 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,12 +2,13 @@ fastapi==0.105.0
 uvicorn==0.24.0
 psycopg2==2.9.9
 geojson-pydantic==1.0.1
-pytest == 7.4.3
-geojson == 3.1.0
+pytest==7.4.3
 
-# Used for new relic monitoring
-newrelic == 7.2.4.171
-sentry-sdk == 1.5.12
+geojson==3.1.0
+
+# # Used for new relic monitoring
+# newrelic==7.2.4.171
+# sentry-sdk==1.5.12
 
 ## Third party
 area==1.1.1

From 4129e75de64b1dc5fdf138a3479863578eaf93ce Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 20:17:12 +0545
Subject: [PATCH 12/20] Split requirement and fix typo in setup.py

---
 requirements.txt | 6 +++++-
 setup.py         | 4 ++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 494e1cae..938d294b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,10 +2,14 @@ fastapi==0.105.0
 uvicorn==0.24.0
 psycopg2==2.9.9
 geojson-pydantic==1.0.1
-pytest==7.4.3
+
 
 geojson==3.1.0
 
+# Testing 
+pytest==7.4.3
+
+
 # # Used for new relic monitoring
 # newrelic==7.2.4.171
 # sentry-sdk==1.5.12
diff --git a/setup.py b/setup.py
index cb5655b0..76b48c2d 100644
--- a/setup.py
+++ b/setup.py
@@ -9,11 +9,11 @@
     description="The Raw Data API  module makes it simple for you to get osm data stats provided by api in your own project",
     packages=setuptools.find_packages(),
     install_requires=[
-        "pytest == 7.4.3",
+        "pytest==7.4.3",
         "psycopg2",
         "boto3==1.24.38",
         "fastapi==0.105.0",
-        "geojson == 7.4.3",
+        "geojson==3.1.0",
         "area==1.1.1",
         "orjson==3.9.10",
         "slowapi==0.1.8",

From 55b67507fa7541b835baf76cd176defb0a7be5c9 Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 20:22:10 +0545
Subject: [PATCH 13/20] Only import hdx config if it is enabled

---
 src/validation/models.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/validation/models.py b/src/validation/models.py
index b0185e23..e586af2f 100644
--- a/src/validation/models.py
+++ b/src/validation/models.py
@@ -29,13 +29,14 @@
 
 from src.config import (
     ALLOW_BIND_ZIP_FILTER,
-    ALLOWED_HDX_TAGS,
-    ALLOWED_HDX_UPDATE_FREQUENCIES,
     ENABLE_POLYGON_STATISTICS_ENDPOINTS,
     ENABLE_TILES,
-    EXPORT_MAX_AREA_SQKM,
+    ENABLE_HDX_EXPORTS
 )
 
+if ENABLE_HDX_EXPORTS:
+    from src.config import ALLOWED_HDX_TAGS, ALLOWED_HDX_UPDATE_FREQUENCIES,
+
 
 def to_camel(string: str) -> str:
     split_string = string.split("_")

From 55aacbea44bc9fb53853661039b7afa008bd20b7 Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 20:23:37 +0545
Subject: [PATCH 14/20] Remove training comma in import

---
 src/validation/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/validation/models.py b/src/validation/models.py
index e586af2f..febcc1ac 100644
--- a/src/validation/models.py
+++ b/src/validation/models.py
@@ -29,13 +29,13 @@
 
 from src.config import (
     ALLOW_BIND_ZIP_FILTER,
+    ENABLE_HDX_EXPORTS,
     ENABLE_POLYGON_STATISTICS_ENDPOINTS,
     ENABLE_TILES,
-    ENABLE_HDX_EXPORTS
 )
 
 if ENABLE_HDX_EXPORTS:
-    from src.config import ALLOWED_HDX_TAGS, ALLOWED_HDX_UPDATE_FREQUENCIES,
+    from src.config import ALLOWED_HDX_TAGS, ALLOWED_HDX_UPDATE_FREQUENCIES
 
 
 def to_camel(string: str) -> str:

From a72cee418ef0bc9049705b294ac0449b6b7a3431 Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 20:32:09 +0545
Subject: [PATCH 15/20] Install missing lib for unit test

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 938d294b..5e67808a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,7 +8,7 @@ geojson==3.1.0
 
 # Testing 
 pytest==7.4.3
-
+httpx==0.26.0 
 
 # # Used for new relic monitoring
 # newrelic==7.2.4.171

From b3f0004eaf538d3ce85426eb62cfcd1ef07da260 Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 20:37:17 +0545
Subject: [PATCH 16/20] Only import sentry if config is supplied , Also adds
 documentation to builder

---
 API/main.py                  |  4 +++-
 src/query_builder/builder.py | 43 ++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/API/main.py b/API/main.py
index 87edae7c..b1323244 100644
--- a/API/main.py
+++ b/API/main.py
@@ -18,7 +18,6 @@
 # <info@hotosm.org>
 import time
 
-import sentry_sdk
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
@@ -48,6 +47,9 @@
 if ENABLE_POLYGON_STATISTICS_ENDPOINTS:
     from .stats import router as stats_router
 
+if SENTRY_DSN:
+    import sentry_sdk
+
 # only use sentry if it is specified in config blocks
 if SENTRY_DSN:
     sentry_sdk.init(
diff --git a/src/query_builder/builder.py b/src/query_builder/builder.py
index db0c18b9..448cfe2e 100644
--- a/src/query_builder/builder.py
+++ b/src/query_builder/builder.py
@@ -829,6 +829,15 @@ def generate_polygon_stats_graphql_query(geojson_feature):
 
 
 def get_country_from_iso(iso3):
+    """
+    Generate a SQL query to retrieve country information based on ISO3 code.
+
+    Args:
+    - iso3 (str): ISO3 Country Code.
+
+    Returns:
+    str: SQL query to fetch country information.
+    """
     query = f"""SELECT
                     b.cid::int as fid, b.description as name, b.dataset_name as dataset_prefix, b.locations as locations
                 FROM
@@ -842,6 +851,19 @@ def get_country_from_iso(iso3):
 def postgres2duckdb_query(
     base_table_name, table, cid=None, geometry=None, enable_users_detail=False
 ):
+    """
+    Generate a DuckDB query to create a table from a PostgreSQL query.
+
+    Args:
+    - base_table_name (str): Base table name.
+    - table (str): PostgreSQL table name.
+    - cid (int, optional): Country ID for filtering. Defaults to None.
+    - geometry (Polygon, optional): Custom polygon geometry. Defaults to None.
+    - enable_users_detail (bool, optional): Enable user details. Defaults to False.
+
+    Returns:
+    str: DuckDB query for creating a table.
+    """
     select_query = (
         """osm_id, version, changeset, timestamp, tags, ST_AsBinary(geom) as geometry"""
     )
@@ -863,6 +885,18 @@ def postgres2duckdb_query(
 
 
 def extract_features_duckdb(base_table_name, select, feature_type, where):
+    """
+    Generate a DuckDB query to extract features based on given parameters.
+
+    Args:
+    - base_table_name (str): Base table name.
+    - select (List[str]): List of selected fields.
+    - feature_type (str): Type of feature (points, lines, polygons).
+    - where (str): SQL-like condition to filter features.
+
+    Returns:
+    str: DuckDB query to extract features.
+    """
     map_tables = {
         "points": {"table": ["nodes"], "where": {"nodes": where}},
         "lines": {
@@ -894,6 +928,15 @@ def extract_features_duckdb(base_table_name, select, feature_type, where):
 
 
 def get_country_geom_from_iso(iso3):
+    """
+    Generate a SQL query to retrieve country geometry based on ISO3 code.
+
+    Args:
+    - iso3 (str): ISO3 Country Code.
+
+    Returns:
+    str: SQL query to fetch country geometry.
+    """
     query = f"""SELECT
                     ST_AsGeoJSON(geometry) as geom
                 FROM

From 82bb633abf1aa37ac87578a94e682ff89dd51a54 Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 20:41:56 +0545
Subject: [PATCH 17/20] Only import hdx related config if its in config enabled
 , added docs for ducdb class

---
 src/app.py | 44 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/src/app.py b/src/app.py
index 1a119ecd..b44caddd 100644
--- a/src/app.py
+++ b/src/app.py
@@ -55,7 +55,6 @@
     EXPORT_MAX_AREA_SQKM,
 )
 from src.config import EXPORT_PATH as export_path
-from src.config import HDX_MAINTAINER, HDX_OWNER_ORG, HDX_URL_PREFIX
 from src.config import INDEX_THRESHOLD as index_threshold
 from src.config import POLYGON_STATISTICS_API_URL
 from src.config import USE_CONNECTION_POOLING as use_connection_pooling
@@ -89,6 +88,9 @@
     import duckdb
     from hdx.data.dataset import Dataset
 
+    from src.config import HDX_MAINTAINER, HDX_OWNER_ORG, HDX_URL_PREFIX
+
+
 global LOCAL_CON_POOL
 
 # getting the pool instance which was fireup when API is started
@@ -1104,6 +1106,13 @@ def get_summary_stats(self):
 
 
 class DuckDB:
+    """
+    Constructor for the DuckDB class.
+
+    Parameters:
+    - db_path (str): The path to the DuckDB database file.
+    """
+
     def __init__(self, db_path):
         dbdict = get_db_connection_params()
         self.db_con_str = convert_dict_to_conn_str(db_dict=dbdict)
@@ -1118,6 +1127,14 @@ def __init__(self, db_path):
         con.load_extension("json")
 
     def run_query(self, query, attach_pgsql=False, load_spatial=False):
+        """
+        Executes a query on the DuckDB database.
+
+        Parameters:
+        - query (str): The SQL query to execute.
+        - attach_pgsql (bool): Flag to indicate whether to attach a PostgreSQL database.
+        - load_spatial (bool): Flag to indicate whether to load the spatial extension.
+        """
         with duckdb.connect(self.db_path) as con:
             if attach_pgsql:
                 con.execute(
@@ -1131,6 +1148,13 @@ def run_query(self, query, attach_pgsql=False, load_spatial=False):
 
 
 class HDX:
+    """
+    Constructor for the HDX class.
+
+    Parameters:
+    - params (DynamicCategoriesModel): An instance of DynamicCategoriesModel containing configuration settings.
+    """
+
     def __init__(self, params):
         self.params = params
         self.iso3 = self.params.iso3
@@ -1180,6 +1204,15 @@ def __init__(self, params):
         self.duck_db_instance = DuckDB(self.duck_db_db_path)
 
     def types_to_tables(self, type_list: list):
+        """
+        Maps feature types to corresponding database tables.
+
+        Parameters:
+        - type_list (List[str]): List of feature types.
+
+        Returns:
+        - List of database tables associated with the given feature types.
+        """
         mapping = {
             "points": ["nodes"],
             "lines": ["ways_line", "relations"],
@@ -1195,6 +1228,15 @@ def types_to_tables(self, type_list: list):
         return list(table_set)
 
     def format_where_clause(self, where_clause):
+        """
+        Formats the where_clause by replacing certain patterns.
+
+        Parameters:
+        - where_clause (str): SQL-like condition to filter features.
+
+        Returns:
+        - Formatted where_clause.
+        """
         pattern = r"tags\['([^']+)'\]"
         match = re.search(pattern, where_clause)
 

From f33a0799b56f4d7e7e178eb28fc5c50c00224a2b Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 21:02:45 +0545
Subject: [PATCH 18/20] Added missing docstrings and also added default none to
 optional as pydantic is upgraded

---
 src/app.py               | 132 +++++++++++++++++++++++++++++++++++++++
 src/validation/models.py |  78 +++++++++++++++++++----
 2 files changed, 198 insertions(+), 12 deletions(-)

diff --git a/src/app.py b/src/app.py
index b44caddd..89d8f9aa 100644
--- a/src/app.py
+++ b/src/app.py
@@ -1248,6 +1248,15 @@ def format_where_clause(self, where_clause):
             return where_clause
 
     def upload_to_s3(self, resource_path):
+        """
+        Uploads a resource file to Amazon S3.
+
+        Parameters:
+        - resource_path (str): Path to the resource file on the local filesystem.
+
+        Returns:
+        - Download URL for the uploaded resource.
+        """
         if not USE_S3_TO_UPLOAD:
             raise HTTPException(
                 status_code=404, detail="S3 Export service is disabled on server"
@@ -1263,6 +1272,15 @@ def upload_to_s3(self, resource_path):
         return download_url
 
     def zip_to_s3(self, resources):
+        """
+        Zips and uploads a list of resources to Amazon S3.
+
+        Parameters:
+        - resources (List[Dict[str, Any]]): List of resource dictionaries.
+
+        Returns:
+        - List of resource dictionaries with added download URLs.
+        """
         for resource in resources:
             resource["download_url"] = self.upload_to_s3(
                 resource_path=resource["zip_path"]
@@ -1271,6 +1289,16 @@ def zip_to_s3(self, resources):
         return resources
 
     def file_to_zip(self, working_dir, zip_path):
+        """
+        Creates a ZIP file from files in a directory.
+
+        Parameters:
+        - working_dir (str): Path to the directory containing files to be zipped.
+        - zip_path (str): Path to the resulting ZIP file.
+
+        Returns:
+        - Path to the created ZIP file.
+        """
         zf = zipfile.ZipFile(
             zip_path,
             "w",
@@ -1291,6 +1319,18 @@ def file_to_zip(self, working_dir, zip_path):
         return zip_path
 
     def query_to_file(self, query, category_name, feature_type, export_formats):
+        """
+        Executes a query and exports the result to file(s).
+
+        Parameters:
+        - query (str): SQL query to execute.
+        - category_name (str): Name of the category.
+        - feature_type (str): Feature type.
+        - export_formats (List[ExportTypeInfo]): List of export formats.
+
+        Returns:
+        - List of resource dictionaries containing export information.
+        """
         category_name = slugify(category_name.lower()).replace("-", "_")
         file_export_path = os.path.join(
             self.default_export_path, category_name, feature_type
@@ -1347,6 +1387,15 @@ def process_export_format(export_format):
         return resources
 
     def process_category_result(self, category_result):
+        """
+        Processes the result of a category and prepares the response.
+
+        Parameters:
+        - category_result (CategoryResult): Instance of CategoryResult.
+
+        Returns:
+        - Dictionary containing processed category result.
+        """
         if self.params.hdx_upload:
             return self.resource_to_hdx(
                 uploaded_resources=category_result.uploaded_resources,
@@ -1359,6 +1408,15 @@ def process_category_result(self, category_result):
         )
 
     def process_category(self, category):
+        """
+        Processes a category by executing queries and handling exports.
+
+        Parameters:
+        - category (Dict[str, CategoryModel]): Dictionary representing a category.
+
+        Returns:
+        - List of resource dictionaries containing export information.
+        """
         category_name, category_data = list(category.items())[0]
         all_uploaded_resources = []
         for feature_type in category_data.types:
@@ -1376,6 +1434,16 @@ def process_category(self, category):
         return all_uploaded_resources
 
     def resource_to_response(self, uploaded_resources, category):
+        """
+        Converts uploaded resources to a response format.
+
+        Parameters:
+        - uploaded_resources (List[Dict[str, Any]]): List of resource dictionaries.
+        - category (Dict[str, CategoryModel]): Dictionary representing a category.
+
+        Returns:
+        - Dictionary containing the response information.
+        """
         category_name, category_data = list(category.items())[0]
 
         dataset_info = {}
@@ -1394,6 +1462,17 @@ def resource_to_response(self, uploaded_resources, category):
         return {category_name: dataset_info}
 
     def resource_to_hdx(self, uploaded_resources, dataset_config, category):
+        """
+        Converts uploaded resources to an HDX dataset and uploads to HDX.
+
+        Parameters:
+        - uploaded_resources (List[Dict[str, Any]]): List of resource dictionaries.
+        - dataset_config (DatasetConfig): Instance of DatasetConfig.
+        - category (Dict[str, CategoryModel]): Dictionary representing a category.
+
+        Returns:
+        - Dictionary containing the HDX upload information.
+        """
         if any(
             item["format_suffix"] in self.HDX_SUPPORTED_FORMATS
             for item in uploaded_resources
@@ -1428,11 +1507,20 @@ def resource_to_hdx(self, uploaded_resources, dataset_config, category):
             return {category_name: hdx_dataset_info}
 
     def clean_resources(self):
+        """
+        Cleans up temporary resources.
+        """
         temp_dir = os.path.join(export_path, self.uuid)
         if os.path.exists(temp_dir):
             shutil.rmtree(temp_dir)
 
     def process_hdx_tags(self):
+        """
+        Processes HDX tags and executes category processing in parallel.
+
+        Returns:
+        - Dictionary containing the processed dataset information.
+        """
         table_type = [
             cat_type
             for category in self.params.categories
@@ -1512,6 +1600,17 @@ def process_hdx_tags(self):
 
 
 class HDXUploader:
+    """
+    Constructor for the HDXUploader class.
+
+    Parameters:
+    - category (Dict[str, CategoryModel]): Dictionary representing a category.
+    - hdx (HDX): Instance of the HDX class.
+    - uuid (str): Universally unique identifier.
+    - default_category_path (str): Default path for the category.
+    - completeness_metadata (Optional[Dict[str, Any]]): Metadata for completeness.
+    """
+
     def __init__(
         self, category, hdx, uuid, default_category_path, completeness_metadata=None
     ):
@@ -1527,9 +1626,24 @@ def __init__(
         self.resources = []
 
     def slugify(self, name):
+        """
+        Converts a string to a valid slug format.
+
+        Parameters:
+        - name (str): Input string.
+
+        Returns:
+        - Slugified string.
+        """
         return slugify(name).replace("-", "_")
 
     def add_notes(self):
+        """
+        Adds notes based on category data.
+
+        Returns:
+        - Notes string.
+        """
         columns = []
         for key in self.category_data.select:
             columns.append(
@@ -1555,11 +1669,26 @@ def add_notes(self):
         )
 
     def add_resource(self, resource_meta):
+        """
+        Adds a resource to the list of resources.
+
+        Parameters:
+        - resource_meta (Dict[str, Any]): Metadata for the resource.
+        """
         if self.dataset:
             self.resources.append(resource_meta)
             self.dataset.add_update_resource(resource_meta)
 
     def upload_dataset(self, dump_config_to_s3=False):
+        """
+        Uploads the dataset to HDX.
+
+        Parameters:
+        - dump_config_to_s3 (bool): Flag to indicate whether to dump configuration to S3.
+
+        Returns:
+        - Tuple containing category name and dataset information.
+        """
         if self.dataset:
             dataset_info = {}
             dt_config_path = os.path.join(
@@ -1586,6 +1715,9 @@ def upload_dataset(self, dump_config_to_s3=False):
             return self.category_name, dataset_info
 
     def init_dataset(self):
+        """
+        Initializes the HDX dataset.
+        """
         dataset_prefix = self.hdx.dataset_prefix
         dataset_title = self.hdx.dataset_title
         dataset_locations = self.hdx.dataset_locations
diff --git a/src/validation/models.py b/src/validation/models.py
index febcc1ac..15895455 100644
--- a/src/validation/models.py
+++ b/src/validation/models.py
@@ -94,27 +94,27 @@ class JoinFilterType(Enum):
 
 
 class SQLFilter(BaseModel):
-    join_or: Optional[Dict[str, List[str]]]
-    join_and: Optional[Dict[str, List[str]]]
+    join_or: Optional[Dict[str, List[str]]] = Field(default=None)
+    join_and: Optional[Dict[str, List[str]]] = Field(default=None)
 
 
 class TagsFilter(BaseModel):
-    point: Optional[SQLFilter]
-    line: Optional[SQLFilter]
-    polygon: Optional[SQLFilter]
-    all_geometry: Optional[SQLFilter]
+    point: Optional[SQLFilter] = Field(default=None)
+    line: Optional[SQLFilter] = Field(default=None)
+    polygon: Optional[SQLFilter] = Field(default=None)
+    all_geometry: Optional[SQLFilter] = Field(default=None)
 
 
 class AttributeFilter(BaseModel):
-    point: Optional[List[str]]
-    line: Optional[List[str]]
-    polygon: Optional[List[str]]
-    all_geometry: Optional[List[str]]
+    point: Optional[List[str]] = Field(default=None)
+    line: Optional[List[str]] = Field(default=None)
+    polygon: Optional[List[str]] = Field(default=None)
+    all_geometry: Optional[List[str]] = Field(default=None)
 
 
 class Filters(BaseModel):
-    tags: Optional[TagsFilter]
-    attributes: Optional[AttributeFilter]
+    tags: Optional[TagsFilter] = Field(default=None)
+    attributes: Optional[AttributeFilter] = Field(default=None)
 
 
 class RawDataCurrentParamsBase(BaseModel):
@@ -291,6 +291,15 @@ def set_geometry_or_iso3(cls, value, values):
 
 
 class HDXModel(BaseModel):
+    """
+    Model for HDX configuration settings.
+
+    Fields:
+    - tags (List[str]): List of tags for the HDX model.
+    - caveats (str): Caveats/Warning for the Datasets.
+    - notes (str): Extra notes to append in the notes section of HDX datasets.
+    """
+
     tags: List[str] = Field(
         ...,
         description="List of tags for the HDX model.",
@@ -318,6 +327,17 @@ def validate_tags(cls, value):
 
 
 class CategoryModel(BaseModel):
+    """
+    Model for category configuration settings.
+
+    Fields:
+    - hdx (HDXModel): HDX configuration model.
+    - types (List[str]): List of feature types (points, lines, polygons).
+    - select (List[str]): List of selected fields.
+    - where (str): SQL-like condition to filter features.
+    - formats (List[str]): List of Export Formats (suffixes).
+    """
+
     hdx: HDXModel
     types: List[str] = Field(
         ...,
@@ -359,6 +379,16 @@ def validate_export_types(cls, value):
 
 
 class ExportTypeInfo:
+    """
+    Class representing export type information.
+
+    Fields:
+    - suffix (str): File suffix for the export type.
+    - driver_name (str): GDAL driver name.
+    - layer_creation_options (List[str]): Layer creation options.
+    - format_option (str): Format option for GDAL.
+    """
+
     def __init__(self, suffix, driver_name, layer_creation_options, format_option):
         self.suffix = suffix
         self.driver_name = driver_name
@@ -380,6 +410,18 @@ def __init__(self, suffix, driver_name, layer_creation_options, format_option):
 
 
 class DatasetConfig(BaseModel):
+    """
+    Model for dataset configuration settings.
+
+    Fields:
+    - private (bool): Make dataset private. By default False, public is recommended.
+    - subnational (bool): Make it true if the dataset doesn't cover the nation/country.
+    - update_frequency (str): Update frequency to be added on uploads.
+    - dataset_title (str): Dataset title that appears at the top of the page.
+    - dataset_prefix (str): Dataset prefix to be appended before the category name. Ignored if iso3 is supplied.
+    - dataset_locations (List[str]): Valid dataset locations iso3.
+    """
+
     private: bool = Field(
         default=False,
         description="Make dataset private , By default False , Public is recommended",
@@ -421,6 +463,18 @@ def validate_frequency(cls, value):
 
 
 class DynamicCategoriesModel(BaseModel):
+    """
+    Model for dynamic categories.
+
+    Fields:
+    - iso3 (Optional[str]): ISO3 Country Code.
+    - dataset (Optional[DatasetConfig]): Dataset Configurations for HDX Upload.
+    - meta (bool): Dumps Meta db in parquet format & HDX config JSON to S3.
+    - hdx_upload (bool): Enable/Disable uploading the dataset to HDX.
+    - categories (List[Dict[str, CategoryModel]]): List of dynamic categories.
+    - geometry (Optional[Union[Polygon, MultiPolygon]]): Custom polygon geometry.
+    """
+
     iso3: Optional[str] = Field(
         default=None,
         description="ISO3 Country Code",

From eaacec5dd7e8dd76b2c46ca1496b8afa2c83f9b1 Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 21:08:29 +0545
Subject: [PATCH 19/20] Adds missing docstring in models

---
 src/validation/models.py | 44 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/src/validation/models.py b/src/validation/models.py
index 15895455..6ec2c92d 100644
--- a/src/validation/models.py
+++ b/src/validation/models.py
@@ -318,6 +318,17 @@ class HDXModel(BaseModel):
 
     @validator("tags")
     def validate_tags(cls, value):
+        """Validates tags if they are allowed from hdx allowed approved tags
+
+        Args:
+            value (_type_): _description_
+
+        Raises:
+            ValueError: _description_
+
+        Returns:
+            _type_: _description_
+        """
         for item in value:
             if item.strip() not in ALLOWED_HDX_TAGS:
                 raise ValueError(
@@ -362,6 +373,17 @@ class CategoryModel(BaseModel):
 
     @validator("types")
     def validate_types(cls, value):
+        """validates geom types
+
+        Args:
+            value (_type_): _description_
+
+        Raises:
+            ValueError: _description_
+
+        Returns:
+            _type_: _description_
+        """
         allowed_types = {"points", "lines", "polygons"}
         for item in value:
             if item not in allowed_types:
@@ -372,6 +394,17 @@ def validate_types(cls, value):
 
     @validator("formats")
     def validate_export_types(cls, value):
+        """Validates export types if they are supported
+
+        Args:
+            value (_type_): _description_
+
+        Raises:
+            ValueError: _description_
+
+        Returns:
+            _type_: _description_
+        """
         for export_type in value:
             if export_type not in EXPORT_TYPE_MAPPING:
                 raise ValueError(f"Unsupported export type: {export_type}")
@@ -455,6 +488,17 @@ class DatasetConfig(BaseModel):
 
     @validator("update_frequency")
     def validate_frequency(cls, value):
+        """Validates frequency
+
+        Args:
+            value (_type_): _description_
+
+        Raises:
+            ValueError: _description_
+
+        Returns:
+            _type_: _description_
+        """
         if value.strip() not in ALLOWED_HDX_UPDATE_FREQUENCIES:
             raise ValueError(
                 f"Invalid update frequency , Should be within {ALLOWED_HDX_UPDATE_FREQUENCIES}"

From cceba46cdf088f96e469b2a991e02e0c91e1fcc2 Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 21 Dec 2023 21:32:17 +0545
Subject: [PATCH 20/20] Adds authentication on hdx endpoints and modifies tasks
 endpoint to get status of whats going on

---
 API/hdx.py   |  4 +++-
 API/tasks.py | 13 ++++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/API/hdx.py b/API/hdx.py
index 4618fbe3..1421fe6a 100644
--- a/API/hdx.py
+++ b/API/hdx.py
@@ -1,4 +1,4 @@
-from fastapi import APIRouter, Body, Request
+from fastapi import APIRouter, Body, Depends, Request
 from fastapi.responses import JSONResponse
 from fastapi_versioning import version
 
@@ -7,6 +7,7 @@
 from src.validation.models import DynamicCategoriesModel
 
 from .api_worker import process_hdx_request
+from .auth import AuthUser, staff_required
 
 router = APIRouter(prefix="/hdx", tags=["HDX"])
 
@@ -16,6 +17,7 @@
 @version(1)
 async def process_hdx_requests(
     request: Request,
+    user: AuthUser = Depends(staff_required),
     params: DynamicCategoriesModel = Body(
         ...,
         description="Input parameters including ISO3 country code and dynamic categories.",
diff --git a/API/tasks.py b/API/tasks.py
index 03a7c903..a93b0dc6 100644
--- a/API/tasks.py
+++ b/API/tasks.py
@@ -68,9 +68,16 @@ def inspect_workers():
     inspected = celery.control.inspect()
 
     def extract_file_name(args: str) -> str:
-        """Extract file_name using a pattern match."""
-        match = re.search(r"file_name\s*=\s*['\"]([^'\"]+)['\"]", args)
-        return match.group(1) if match else None
+        """Extract value prioritizing file_name, then iso3, and finally dataset_title."""
+        keys = ["file_name", "iso3", "dataset_title"]
+
+        for key in keys:
+            pattern = re.compile(rf"{key}\s*=\s*['\"]([^'\"]+)['\"]")
+            match = pattern.search(args)
+            if match:
+                return match.group(1)
+
+        return None
 
     def filter_task_details(tasks: List[dict]) -> List[dict]:
         """Filter task details to include only id and file_name."""