From 8b73004023089e05028b9811626f798bda098795 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Fri, 12 Jan 2024 09:36:52 +0545 Subject: [PATCH] Adds task limit feature from API , Enhances query to union the feature passed --- API/api_worker.py | 22 +++++++++++++++++++--- docs/src/installation/configurations.md | 11 ++++++++--- src/config.py | 14 ++++++++++++++ src/query_builder/builder.py | 4 ++-- 4 files changed, 43 insertions(+), 8 deletions(-) diff --git a/API/api_worker.py b/API/api_worker.py index 95d55c6e..b2112d4c 100644 --- a/API/api_worker.py +++ b/API/api_worker.py @@ -16,7 +16,13 @@ from src.config import ALLOW_BIND_ZIP_FILTER from src.config import CELERY_BROKER_URL as celery_broker_uri from src.config import CELERY_RESULT_BACKEND as celery_backend -from src.config import ENABLE_TILES +from src.config import ( + DEFAULT_HARD_TASK_LIMIT, + DEFAULT_SOFT_TASK_LIMIT, + ENABLE_TILES, + HDX_HARD_TASK_LIMIT, + HDX_SOFT_TASK_LIMIT, +) from src.config import USE_S3_TO_UPLOAD as use_s3_to_upload from src.config import logger as logging from src.query_builder.builder import format_file_name_str @@ -37,7 +43,12 @@ celery.conf.update(result_extended=True) -@celery.task(bind=True, name="process_raw_data") +@celery.task( + bind=True, + name="process_raw_data", + time_limit=DEFAULT_HARD_TASK_LIMIT, + soft_time_limit=DEFAULT_SOFT_TASK_LIMIT, +) def process_raw_data(self, params): params = RawDataCurrentParams(**params) try: @@ -195,7 +206,12 @@ def process_raw_data(self, params): raise ex -@celery.task(bind=True, name="process_custom_request") +@celery.task( + bind=True, + name="process_custom_request", + time_limit=HDX_HARD_TASK_LIMIT, + soft_time_limit=HDX_SOFT_TASK_LIMIT, +) def process_custom_request(self, params): params = DynamicCategoriesModel(**params) diff --git a/docs/src/installation/configurations.md b/docs/src/installation/configurations.md index 29481820..82f550c8 100644 --- a/docs/src/installation/configurations.md +++ b/docs/src/installation/configurations.md @@ -59,6 +59,8 @@ The following are the different configuration options that are accepted. | `ENABLE_POLYGON_STATISTICS_ENDPOINTS` | `ENABLE_POLYGON_STATISTICS_ENDPOINTS` | `[API_CONFIG]` | `False` | Option to enable endpoints related the polygon statistics about the approx buildings,road length in passed polygon| OPTIONAL | | `POLYGON_STATISTICS_API_URL` | `POLYGON_STATISTICS_API_URL` | `[API_CONFIG]` | `None` | API URL for the polygon statistics to fetch the metadata , Currently tested with graphql query endpoint of Kontour , Only required if it is enabled from ENABLE_POLYGON_STATISTICS_ENDPOINTS | OPTIONAL | | `POLYGON_STATISTICS_API_URL` | `POLYGON_STATISTICS_API_RATE_LIMIT` | `[API_CONFIG]` | `5` | Rate limit to be applied for statistics endpoint per minute, Defaults to 5 request is allowed per minute | OPTIONAL | +| `DEFAULT_SOFT_TASK_LIMIT` | `DEFAULT_SOFT_TASK_LIMIT` | `[API_CONFIG]` | `7200` | Soft task time limit signal for celery workers in seconds.It will gently remind celery to finish up the task and terminate, Defaults to 2 Hour| OPTIONAL | +| `DEFAULT_HARD_TASK_LIMIT` | `DEFAULT_HARD_TASK_LIMIT` | `[API_CONFIG]` | `10800` | Hard task time limit signal for celery workers in seconds. It will immediately kill the celery task.Defaults to 3 Hour| OPTIONAL | | `CELERY_BROKER_URL` | `CELERY_BROKER_URL` | `[CELERY]` | `redis://localhost:6379/0` | Redis connection string for the broker | OPTIONAL | | `CELERY_RESULT_BACKEND` | `CELERY_RESULT_BACKEND` | `[CELERY]` | `redis://localhost:6379/0` | Redis/psotgresql connection string for the the result backend, eg : db+postgresql://username:password@localhost:5432/db_name | OPTIONAL | | `FILE_UPLOAD_METHOD` | `FILE_UPLOAD_METHOD` | `[EXPORT_UPLOAD]` | `disk` | File upload method; Allowed values - disk, s3 | OPTIONAL | @@ -74,6 +76,8 @@ The following are the different configuration options that are accepted. | `HDX_MAINTAINER` | `HDX_MAINTAINER` | `[HDX]` | None | Your HDX Maintainer ID | CONDITIONAL | | `DUCK_DB_MEMORY_LIMIT` | `DUCK_DB_MEMORY_LIMIT` | `[HDX]` | None | Duck DB max memory limit , 80 % of your RAM eg : '5GB'| CONDITIONAL | | `DUCK_DB_THREAD_LIMIT` | `DUCK_DB_THREAD_LIMIT` | `[HDX]` | None | Duck DB max threads limit ,n of your cores eg : 2 | CONDITIONAL | +| `HDX_SOFT_TASK_LIMIT` | `HDX_SOFT_TASK_LIMIT` | `[HDX]` | `18000` | Soft task time limit signal for celery workers in seconds.It will gently remind celery to finish up the task and terminate, Defaults to 5 Hour| OPTIONAL | +| `HDX_HARD_TASK_LIMIT` | `HDX_HARD_TASK_LIMIT` | `[HDX]` | `21600` | Hard task time limit signal for celery workers in seconds. It will immediately kill the celery task.Defaults to 6 Hour| OPTIONAL | ## Which Service uses which settings? @@ -103,6 +107,8 @@ The following are the different configuration options that are accepted. | `ENABLE_POLYGON_STATISTICS_ENDPOINTS` | `[API_CONFIG]` | Yes | Yes | | `POLYGON_STATISTICS_API_URL` | `[API_CONFIG]` | Yes | Yes | | `POLYGON_STATISTICS_API_RATE_LIMIT` | `[API_CONFIG]` | Yes | No | +| `DEFAULT_SOFT_TASK_LIMIT` | `[API_CONFIG]` | No | Yes | +| `DEFAULT_HARD_TASK_LIMIT` | `[API_CONFIG]` | No | Yes | | `CELERY_BROKER_URL` | `[CELERY]` | Yes | Yes | | `CELERY_RESULT_BACKEND` | `[CELERY]` | Yes | Yes | | `FILE_UPLOAD_METHOD` | `[EXPORT_UPLOAD]` | Yes | Yes | @@ -118,9 +124,8 @@ The following are the different configuration options that are accepted. | `HDX_MAINTAINER` | `[HDX]` | Yes | Yes | | `DUCK_DB_MEMORY_LIMIT` | `[HDX]` | Yes | Yes | | `DUCK_DB_THREAD_LIMIT` | `[HDX]` | Yes | Yes | - - - +| `HDX_SOFT_TASK_LIMIT` | `[HDX]` | No | Yes | +| `HDX_HARD_TASK_LIMIT` | `[HDX]` | No | Yes | diff --git a/src/config.py b/src/config.py index ac9cdbcc..90a80c10 100644 --- a/src/config.py +++ b/src/config.py @@ -184,6 +184,20 @@ def not_raises(func, *args, **kwargs): "POLYGON_STATISTICS_API_RATE_LIMIT" ) or config.get("API_CONFIG", "POLYGON_STATISTICS_API_RATE_LIMIT", fallback=5) +DEFAULT_SOFT_TASK_LIMIT = os.environ.get("DEFAULT_SOFT_TASK_LIMIT") or config.get( + "API_CONFIG", "DEFAULT_SOFT_TASK_LIMIT", fallback=2 * 60 * 60 +) +DEFAULT_HARD_TASK_LIMIT = os.environ.get("DEFAULT_HARD_TASK_LIMIT") or config.get( + "API_CONFIG", "DEFAULT_HARD_TASK_LIMIT", fallback=3 * 60 * 60 +) + +HDX_SOFT_TASK_LIMIT = os.environ.get("HDX_SOFT_TASK_LIMIT") or config.get( + "HDX", "HDX_SOFT_TASK_LIMIT", fallback=5 * 60 * 60 +) +HDX_HARD_TASK_LIMIT = os.environ.get("HDX_HARD_TASK_LIMIT") or config.get( + "HDX", "HDX_HARD_TASK_LIMIT", fallback=6 * 60 * 60 +) + ENABLE_HDX_EXPORTS = os.environ.get("ENABLE_HDX_EXPORTS") or config.getboolean( "HDX", "ENABLE_HDX_EXPORTS", fallback=False ) diff --git a/src/query_builder/builder.py b/src/query_builder/builder.py index 4d3673c7..5b573d9a 100644 --- a/src/query_builder/builder.py +++ b/src/query_builder/builder.py @@ -97,7 +97,7 @@ def get_query_as_geojson(query_list, ogr_export=None): def create_geom_filter(geom, geom_lookup_by="ST_intersects"): """generates geometry intersection filter - Rawdata extraction""" geometry_dump = dumps(loads(geom.model_dump_json())) - return f"""{geom_lookup_by}(geom,ST_GEOMFROMGEOJSON('{geometry_dump}'))""" + return f"""{geom_lookup_by}(geom,ST_Buffer(ST_Union(ST_makeValid(ST_GEOMFROMGEOJSON('{geometry_dump}'))),0.005))""" def format_file_name_str(input_str): @@ -898,7 +898,7 @@ def convert_tags_pattern(query_string): row_filter_condition = ( f"""(country <@ ARRAY [{cid}])""" if cid - else f"""(ST_within(geom,ST_makeValid(ST_GeomFromText('{wkt.dumps(loads(geometry.json()),decimals=6)}',4326))))""" + else f"""(ST_within(geom,ST_Buffer(ST_Union(ST_makeValid(ST_GeomFromText('{wkt.dumps(loads(geometry.json()),decimals=6)}',4326))),0.005)))""" ) if single_category_where: row_filter_condition += f" and ({convert_tags_pattern(single_category_where)})"