Skip to content

Commit

Permalink
write a config to file, with tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dsschult committed Sep 13, 2023
1 parent aa51cf2 commit 071d825
Show file tree
Hide file tree
Showing 9 changed files with 1,164 additions and 4,355 deletions.
56 changes: 45 additions & 11 deletions iceprod/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataclasses import dataclass
import importlib.resources
import json
import logging
try:
from typing import Self
except ImportError:
Expand All @@ -19,28 +20,63 @@ class Dataset:
"""IceProd Dataset config and basic attributes"""
dataset_id: str
dataset_num: int
jobs_submitted: int
tasks_submitted: int
tasks_per_job: int
status: str
priority: float
group: str
user: str
debug: bool
config: dict

@classmethod
async def load_from_api(cls, dataset_id: str, rest_client: RestClient) -> Self:
dataset = await rest_client.request('GET', f'/datasets/{dataset_id}')
config = await rest_client.request('GET', f'/config/{dataset_id}')
return cls(dataset_id, dataset['dataset'], dataset['group'], dataset['username'], config)
return cls(
dataset_id=dataset_id,
dataset_num=dataset['dataset'],
jobs_submitted=dataset['jobs_submitted'],
tasks_submitted=dataset['tasks_submitted'],
tasks_per_job=dataset['tasks_per_job'],
status=dataset['status'],
priority=dataset['priority'],
group=dataset['group'],
user=dataset['username'],
debug=dataset['debug'],
config=config,
)

def fill_defaults(self):
def _load_ref(schema_value):
if '$ref' in list(schema_value.keys()):
# load from ref
parts = schema_value['$ref'].split('/')[1:]
schema_value = CONFIG_SCHEMA
while parts:
schema_value = schema_value.get(parts.pop(0), {})
logging.debug('loading from ref: %r', schema_value)
return schema_value
def _fill_dict(user, schema):
for prop in schema['properties']:
v = schema['properties'][prop].get('default', None)
schema_value = _load_ref(schema['properties'][prop])
v = schema_value.get('default', None)
if prop not in user and v is not None:
user[prop] = v
for k in user:
schema_value = schema['properties'].get(k, {})
if isinstance(user[k], dict) and schema_value['type'] == 'object':
_fill_dict(user[k], schema_value)
elif isinstance(user[k], list) and schema_value['type'] == 'array':
_fill_list(user[k], schema_value)
schema_value = _load_ref(schema['properties'].get(k, {}))
logging.debug('filling defaults for %s: %r', k, schema_value)
try:
t = schema_value.get('type', 'str')
logging.debug('user[k] type == %r, schema_value[type] == %r', type(user[k]), t)
if isinstance(user[k], dict) and t == 'object':
_fill_dict(user[k], schema_value)
elif isinstance(user[k], list) and t == 'array':
_fill_list(user[k], schema_value)
except KeyError:
logging.warning('error processing key %r with schema %r', k, schema_value)
raise

def _fill_list(user, schema):
for item in user:
Expand Down Expand Up @@ -82,12 +118,10 @@ class Task:

@classmethod
async def load_from_api(cls, dataset_id: str, task_id: str, rest_client: RestClient) -> Self:
dataset, config, task = await asyncio.gather(
rest_client.request('GET', f'/datasets/{dataset_id}'),
rest_client.request('GET', f'/config/{dataset_id}'),
d, task = await asyncio.gather(
Dataset.load_from_api(dataset_id, rest_client),
rest_client.request('GET', f'/datasets/{dataset_id}/tasks/{task_id}')
)
d = Dataset(dataset_id, dataset['dataset'], dataset['group'], dataset['username'], config)
job = await rest_client.request('GET', f'/datasets/{dataset_id}/jobs/{task["job_id"]}')
j = Job(d, task['job_id'], job['job_index'], job['status'])
return cls(d, j, task['task_id'], task['task_index'], task['name'], task['depends'], task['requirements'], task['status'], task['site'], {})
Expand Down
133 changes: 68 additions & 65 deletions iceprod/core/data/dataset.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
"options": {
"description": "Internal parameters attached to a dataset",
"type": "object",
"default": {},
"properties": {},
"additionalProperties": true
},
"steering": {
"description": "General paramters, used as references elsewhere in the config",
"type": "object",
"default": {},
"properties": {
"parameters": { "$ref": "#/$defs/parameters" },
"batchsys": { "$ref": "#/$defs/batchsys" },
Expand All @@ -39,7 +41,10 @@
"title": "Task",
"description": "An individual batch job",
"properties": {
"name": { "type": "string" },
"name": {
"type": "string",
"description": "Unique name of task"
},
"depends": {
"type": "array",
"description": "Task dependency names",
Expand All @@ -51,7 +56,6 @@
"description": "Enable to use the task files API"
},
"data": { "$ref": "#/$defs/data" },
"classes": { "$ref": "#/$defs/classes" },
"parameters": { "$ref": "#/$defs/parameters" },
"batchsys": { "$ref": "#/$defs/batchsys" },
"requirements": { "$ref": "#/$defs/requirements" },
Expand All @@ -67,14 +71,17 @@
"title": "Tray",
"description": "Collection of modules",
"properties": {
"name": { "type": "string" },
"name": {
"type": "string",
"default": "",
"description": "Name of tray"
},
"iterations": {
"type": "integer",
"default": 1,
"description": "Number of times to execute this tray"
},
"data": { "$ref": "#/$defs/data", "deprecated": true },
"classes": { "$ref": "#/$defs/classes", "deprecated": true },
"parameters": { "$ref": "#/$defs/parameters" },
"modules": {
"type": "array",
Expand All @@ -83,15 +90,21 @@
"title": "Module",
"description": "The actual thing to execute, usually a script",
"properties": {
"name": {
"type": "string",
"default": "",
"description": "Name of module"
},
"data": { "$ref": "#/$defs/data", "deprecated": true },
"classes": { "$ref": "#/$defs/classes", "deprecated": true },
"parameters": { "$ref": "#/$defs/parameters" },
"running_class": {
"type": "string",
"default": "",
"description": "a Python class or function to call directly"
},
"src": {
"type": "string",
"default": "",
"description": "the location of a class or script"
},
"args": {
Expand All @@ -100,6 +113,7 @@
{ "type": "array", "items": { "type": "string" } },
{ "type": "object", "additionalProperties": { "type": "string" } }
],
"default": "",
"description": "args to give to a class or script"
},
"env_shell": {
Expand All @@ -114,7 +128,9 @@
},
"configs": {
"type": "object",
"default": {},
"description": "any json config files that should be written in $PWD (format is {filename: data})",
"properties": {},
"additionalProperties": { "type": "string" }
}
},
Expand All @@ -136,9 +152,18 @@
"required": [ "version", "tasks", "description" ],

"$defs": {
"parameters": {},
"parameters": {
"type": "object",
"description": "Config parameters",
"default": {},
"properties": {},
"additionalProperties": true
},
"batchsys": {
"description": "Overrides for batch system properties",
"type": "object",
"description": "Overrides for batch system properties: {batchsys: {propname: propvalue}}",
"default": {},
"properties": {},
"additionalProperties": {
"type": "object"
}
Expand All @@ -154,7 +179,6 @@
},
"gpu": {
"type": "integer",
"default": 0,
"description": "GPUs required"
},
"memory": {
Expand All @@ -167,76 +191,55 @@
"default": 1.0,
"description": "Disk required in GB"
},
"time": {
"type": "number",
"default": 1.0,
"description": "Time required in hours"
},
"os": {
"type": "string",
"default": "",
"description": "OS required in CVMFS format"
},
"site": {
"type": "string",
"default": "",
"description": "Site required"
}
}
},
"data": {
"type": "object",
"description": "A data file, to upload or download",
"properties": {
"remote": {
"type": "string",
"default": "",
"description": "remote url (can leave blank for temp files)"
},
"local": {
"type": "string",
"default": "",
"description": "local file name (will use basename of remote if available)"
},
"type": {
"enum": ["permanent", "job_temp", "dataset_temp", "site_temp"],
"default": "permanent",
"description": "type of data"
},
"movement": {
"enum": ["input", "output", "both"],
"default": "both",
"description": "movement of data"
"type": "array",
"default": [],
"items": {
"type": "object",
"description": "A data file, to upload or download",
"properties": {
"remote": {
"type": "string",
"default": "",
"description": "remote url (can leave blank for temp files)"
},
"local": {
"type": "string",
"default": "",
"description": "local file name (will use basename of remote if available)"
},
"type": {
"enum": ["permanent", "job_temp", "dataset_temp", "site_temp"],
"default": "permanent",
"description": "type of data"
},
"movement": {
"enum": ["input", "output", "both"],
"default": "input",
"description": "movement of data"
},
"transfer": {
"anyOf": [{"type": "string"}, {"type": "boolean"}],
"default": true,
"description": "should the data be transferred"
}
}
}
},
"class": {
"type": "object",
"description": "A class object, downloaded from a url",
"properties": {
"name": {
"type": "string",
"default": "",
"description": "name of class"
},
"src": {
"type": "string",
"default": "",
"description": "url"
},
"recursive": {
"type": "boolean",
"default": false,
"description": "recusively add to path",
"deprecated": true
},
"libs": {
"type": "string",
"default": "",
"description": "explicit library paths"
},
"env_vars": {
"type": "string",
"default": "",
"description": "any env vars to add"
}
},
"required": ["name", "src"]
}
}
}
8 changes: 2 additions & 6 deletions iceprod/core/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,11 @@
dataclass, to be used in javascript.
"""

from __future__ import absolute_import, division, print_function

import time

from numbers import Number, Integral
try:
String = basestring
except NameError:
String = str
String = str


# pluralizations for keys that are not classes here
_plurals = {
Expand Down
Loading

0 comments on commit 071d825

Please sign in to comment.