Skip to content

Commit

Permalink
Adds task limit feature from API , Enhances query to union the featur…
Browse files Browse the repository at this point in the history
…e passed
  • Loading branch information
kshitijrajsharma committed Jan 12, 2024
1 parent 011bcd4 commit 8b73004
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 8 deletions.
22 changes: 19 additions & 3 deletions API/api_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@
from src.config import ALLOW_BIND_ZIP_FILTER
from src.config import CELERY_BROKER_URL as celery_broker_uri
from src.config import CELERY_RESULT_BACKEND as celery_backend
from src.config import ENABLE_TILES
from src.config import (
DEFAULT_HARD_TASK_LIMIT,
DEFAULT_SOFT_TASK_LIMIT,
ENABLE_TILES,
HDX_HARD_TASK_LIMIT,
HDX_SOFT_TASK_LIMIT,
)
from src.config import USE_S3_TO_UPLOAD as use_s3_to_upload
from src.config import logger as logging
from src.query_builder.builder import format_file_name_str
Expand All @@ -37,7 +43,12 @@
celery.conf.update(result_extended=True)


@celery.task(bind=True, name="process_raw_data")
@celery.task(
bind=True,
name="process_raw_data",
time_limit=DEFAULT_HARD_TASK_LIMIT,
soft_time_limit=DEFAULT_SOFT_TASK_LIMIT,
)
def process_raw_data(self, params):
params = RawDataCurrentParams(**params)
try:
Expand Down Expand Up @@ -195,7 +206,12 @@ def process_raw_data(self, params):
raise ex


@celery.task(bind=True, name="process_custom_request")
@celery.task(
bind=True,
name="process_custom_request",
time_limit=HDX_HARD_TASK_LIMIT,
soft_time_limit=HDX_SOFT_TASK_LIMIT,
)
def process_custom_request(self, params):
params = DynamicCategoriesModel(**params)

Expand Down
11 changes: 8 additions & 3 deletions docs/src/installation/configurations.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ The following are the different configuration options that are accepted.
| `ENABLE_POLYGON_STATISTICS_ENDPOINTS` | `ENABLE_POLYGON_STATISTICS_ENDPOINTS` | `[API_CONFIG]` | `False` | Option to enable endpoints related the polygon statistics about the approx buildings,road length in passed polygon| OPTIONAL |
| `POLYGON_STATISTICS_API_URL` | `POLYGON_STATISTICS_API_URL` | `[API_CONFIG]` | `None` | API URL for the polygon statistics to fetch the metadata , Currently tested with graphql query endpoint of Kontour , Only required if it is enabled from ENABLE_POLYGON_STATISTICS_ENDPOINTS | OPTIONAL |
| `POLYGON_STATISTICS_API_URL` | `POLYGON_STATISTICS_API_RATE_LIMIT` | `[API_CONFIG]` | `5` | Rate limit to be applied for statistics endpoint per minute, Defaults to 5 request is allowed per minute | OPTIONAL |
| `DEFAULT_SOFT_TASK_LIMIT` | `DEFAULT_SOFT_TASK_LIMIT` | `[API_CONFIG]` | `7200` | Soft task time limit signal for celery workers in seconds.It will gently remind celery to finish up the task and terminate, Defaults to 2 Hour| OPTIONAL |
| `DEFAULT_HARD_TASK_LIMIT` | `DEFAULT_HARD_TASK_LIMIT` | `[API_CONFIG]` | `10800` | Hard task time limit signal for celery workers in seconds. It will immediately kill the celery task.Defaults to 3 Hour| OPTIONAL |
| `CELERY_BROKER_URL` | `CELERY_BROKER_URL` | `[CELERY]` | `redis://localhost:6379/0` | Redis connection string for the broker | OPTIONAL |
| `CELERY_RESULT_BACKEND` | `CELERY_RESULT_BACKEND` | `[CELERY]` | `redis://localhost:6379/0` | Redis/psotgresql connection string for the the result backend, eg : db+postgresql://username:password@localhost:5432/db_name | OPTIONAL |
| `FILE_UPLOAD_METHOD` | `FILE_UPLOAD_METHOD` | `[EXPORT_UPLOAD]` | `disk` | File upload method; Allowed values - disk, s3 | OPTIONAL |
Expand All @@ -74,6 +76,8 @@ The following are the different configuration options that are accepted.
| `HDX_MAINTAINER` | `HDX_MAINTAINER` | `[HDX]` | None | Your HDX Maintainer ID | CONDITIONAL |
| `DUCK_DB_MEMORY_LIMIT` | `DUCK_DB_MEMORY_LIMIT` | `[HDX]` | None | Duck DB max memory limit , 80 % of your RAM eg : '5GB'| CONDITIONAL |
| `DUCK_DB_THREAD_LIMIT` | `DUCK_DB_THREAD_LIMIT` | `[HDX]` | None | Duck DB max threads limit ,n of your cores eg : 2 | CONDITIONAL |
| `HDX_SOFT_TASK_LIMIT` | `HDX_SOFT_TASK_LIMIT` | `[HDX]` | `18000` | Soft task time limit signal for celery workers in seconds.It will gently remind celery to finish up the task and terminate, Defaults to 5 Hour| OPTIONAL |
| `HDX_HARD_TASK_LIMIT` | `HDX_HARD_TASK_LIMIT` | `[HDX]` | `21600` | Hard task time limit signal for celery workers in seconds. It will immediately kill the celery task.Defaults to 6 Hour| OPTIONAL |


## Which Service uses which settings?
Expand Down Expand Up @@ -103,6 +107,8 @@ The following are the different configuration options that are accepted.
| `ENABLE_POLYGON_STATISTICS_ENDPOINTS` | `[API_CONFIG]` | Yes | Yes |
| `POLYGON_STATISTICS_API_URL` | `[API_CONFIG]` | Yes | Yes |
| `POLYGON_STATISTICS_API_RATE_LIMIT` | `[API_CONFIG]` | Yes | No |
| `DEFAULT_SOFT_TASK_LIMIT` | `[API_CONFIG]` | No | Yes |
| `DEFAULT_HARD_TASK_LIMIT` | `[API_CONFIG]` | No | Yes |
| `CELERY_BROKER_URL` | `[CELERY]` | Yes | Yes |
| `CELERY_RESULT_BACKEND` | `[CELERY]` | Yes | Yes |
| `FILE_UPLOAD_METHOD` | `[EXPORT_UPLOAD]` | Yes | Yes |
Expand All @@ -118,9 +124,8 @@ The following are the different configuration options that are accepted.
| `HDX_MAINTAINER` | `[HDX]` | Yes | Yes |
| `DUCK_DB_MEMORY_LIMIT` | `[HDX]` | Yes | Yes |
| `DUCK_DB_THREAD_LIMIT` | `[HDX]` | Yes | Yes |



| `HDX_SOFT_TASK_LIMIT` | `[HDX]` | No | Yes |
| `HDX_HARD_TASK_LIMIT` | `[HDX]` | No | Yes |



Expand Down
14 changes: 14 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,20 @@ def not_raises(func, *args, **kwargs):
"POLYGON_STATISTICS_API_RATE_LIMIT"
) or config.get("API_CONFIG", "POLYGON_STATISTICS_API_RATE_LIMIT", fallback=5)

DEFAULT_SOFT_TASK_LIMIT = os.environ.get("DEFAULT_SOFT_TASK_LIMIT") or config.get(
"API_CONFIG", "DEFAULT_SOFT_TASK_LIMIT", fallback=2 * 60 * 60
)
DEFAULT_HARD_TASK_LIMIT = os.environ.get("DEFAULT_HARD_TASK_LIMIT") or config.get(
"API_CONFIG", "DEFAULT_HARD_TASK_LIMIT", fallback=3 * 60 * 60
)

HDX_SOFT_TASK_LIMIT = os.environ.get("HDX_SOFT_TASK_LIMIT") or config.get(
"HDX", "HDX_SOFT_TASK_LIMIT", fallback=5 * 60 * 60
)
HDX_HARD_TASK_LIMIT = os.environ.get("HDX_HARD_TASK_LIMIT") or config.get(
"HDX", "HDX_HARD_TASK_LIMIT", fallback=6 * 60 * 60
)

ENABLE_HDX_EXPORTS = os.environ.get("ENABLE_HDX_EXPORTS") or config.getboolean(
"HDX", "ENABLE_HDX_EXPORTS", fallback=False
)
Expand Down
4 changes: 2 additions & 2 deletions src/query_builder/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def get_query_as_geojson(query_list, ogr_export=None):
def create_geom_filter(geom, geom_lookup_by="ST_intersects"):
"""generates geometry intersection filter - Rawdata extraction"""
geometry_dump = dumps(loads(geom.model_dump_json()))
return f"""{geom_lookup_by}(geom,ST_GEOMFROMGEOJSON('{geometry_dump}'))"""
return f"""{geom_lookup_by}(geom,ST_Buffer(ST_Union(ST_makeValid(ST_GEOMFROMGEOJSON('{geometry_dump}'))),0.005))"""


def format_file_name_str(input_str):
Expand Down Expand Up @@ -898,7 +898,7 @@ def convert_tags_pattern(query_string):
row_filter_condition = (
f"""(country <@ ARRAY [{cid}])"""
if cid
else f"""(ST_within(geom,ST_makeValid(ST_GeomFromText('{wkt.dumps(loads(geometry.json()),decimals=6)}',4326))))"""
else f"""(ST_within(geom,ST_Buffer(ST_Union(ST_makeValid(ST_GeomFromText('{wkt.dumps(loads(geometry.json()),decimals=6)}',4326))),0.005)))"""
)
if single_category_where:
row_filter_condition += f" and ({convert_tags_pattern(single_category_where)})"
Expand Down

0 comments on commit 8b73004

Please sign in to comment.