From dc77f0587e2a2debd7f1717e8c0eb7fe59cbfac4 Mon Sep 17 00:00:00 2001 From: David Schultz Date: Tue, 15 Oct 2024 11:53:27 -0500 Subject: [PATCH] add validation when submitting new dataset configs --- iceprod/core/config.py | 74 ++++++++++++++++++--------------- iceprod/rest/handlers/config.py | 11 +++++ tests/core/config_test.py | 2 +- 3 files changed, 53 insertions(+), 34 deletions(-) diff --git a/iceprod/core/config.py b/iceprod/core/config.py index 3ab011d9a..dbc418bc3 100644 --- a/iceprod/core/config.py +++ b/iceprod/core/config.py @@ -17,39 +17,7 @@ DATA_DEFAULTS = {key: value.get('default', None) for key,value in CONFIG_SCHEMA['$defs']['data']['items']['properties'].items()} -@dataclass -class Dataset: - """IceProd Dataset config and basic attributes""" - dataset_id: str - dataset_num: int - jobs_submitted: int - tasks_submitted: int - tasks_per_job: int - status: str - priority: float - group: str - user: str - debug: bool - config: dict - - @classmethod - async def load_from_api(cls, dataset_id: str, rest_client: RestClient) -> Self: - dataset = await rest_client.request('GET', f'/datasets/{dataset_id}') - config = await rest_client.request('GET', f'/config/{dataset_id}') - return cls( - dataset_id=dataset_id, - dataset_num=dataset['dataset'], - jobs_submitted=dataset['jobs_submitted'], - tasks_submitted=dataset['tasks_submitted'], - tasks_per_job=dataset['tasks_per_job'], - status=dataset['status'], - priority=dataset['priority'], - group=dataset['group'], - user=dataset['username'], - debug=dataset['debug'], - config=config, - ) - +class _ConfigMixin: def fill_defaults(self): def _load_ref(schema_value): if '$ref' in list(schema_value.keys()): @@ -92,6 +60,46 @@ def validate(self): jsonschema.validate(self.config, CONFIG_SCHEMA) +@dataclass +class Config(_ConfigMixin): + """IceProd Dataset config""" + config: dict + + +@dataclass +class Dataset(_ConfigMixin): + """IceProd Dataset config and basic attributes""" + dataset_id: str + dataset_num: int + jobs_submitted: int + tasks_submitted: int + tasks_per_job: int + status: str + priority: float + group: str + user: str + debug: bool + config: dict + + @classmethod + async def load_from_api(cls, dataset_id: str, rest_client: RestClient) -> Self: + dataset = await rest_client.request('GET', f'/datasets/{dataset_id}') + config = await rest_client.request('GET', f'/config/{dataset_id}') + return cls( + dataset_id=dataset_id, + dataset_num=dataset['dataset'], + jobs_submitted=dataset['jobs_submitted'], + tasks_submitted=dataset['tasks_submitted'], + tasks_per_job=dataset['tasks_per_job'], + status=dataset['status'], + priority=dataset['priority'], + group=dataset['group'], + user=dataset['username'], + debug=dataset['debug'], + config=config, + ) + + @dataclass class Job: """IceProd Job instance""" diff --git a/iceprod/rest/handlers/config.py b/iceprod/rest/handlers/config.py index 8426277b4..b6f528f12 100644 --- a/iceprod/rest/handlers/config.py +++ b/iceprod/rest/handlers/config.py @@ -1,10 +1,12 @@ import logging import json +from jsonschema.exceptions import ValidationError import tornado.web from ..base_handler import APIBase from ..auth import authorization, attr_auth +from iceprod.core.config import Config logger = logging.getLogger('rest.config') @@ -76,5 +78,14 @@ async def put(self, dataset_id): data['dataset_id'] = dataset_id elif data['dataset_id'] != dataset_id: raise tornado.web.HTTPError(400, reason='dataset_id mismatch') + try: + c = Config(data) + c.fill_defaults() + c.validate() + except ValidationError as e: + raise tornado.web.HTTPError(400, reason=str(e)) + except Exception: + logger.warning('unknown config validation error', exc_info=True) + raise tornado.web.HTTPError(400, reason='unknown validation error') await self.db.config.replace_one({'dataset_id':dataset_id}, data, upsert=True) self.write({}) diff --git a/tests/core/config_test.py b/tests/core/config_test.py index ff4d6bebe..c0d6c397d 100644 --- a/tests/core/config_test.py +++ b/tests/core/config_test.py @@ -10,7 +10,7 @@ def test_dataset_dataclasses(): with pytest.raises(Exception): Dataset() - d = Dataset('did123', 123, 1, 2, 3, 'processing', 0.5, 'grp', 'usr', False, {}) + d = Dataset('did123', 123, 1, 2, 3, 'processing', 0.5, 'grp', 'usr', False, config={}) assert d.dataset_id == 'did123' assert d.dataset_num == 123 assert d.jobs_submitted == 1