From 071d825ea252290a144be88de83e6892a21e5a63 Mon Sep 17 00:00:00 2001 From: David Schultz Date: Tue, 5 Sep 2023 15:29:43 -0500 Subject: [PATCH] write a config to file, with tests --- iceprod/core/config.py | 56 +- iceprod/core/data/dataset.schema.json | 133 +- iceprod/core/dataclasses.py | 8 +- iceprod/core/exe.py | 1339 ++++------- iceprod/core/exe_helper.py | 133 -- iceprod/core/pilot.py | 583 ----- iceprod/core/serialization.py | 79 - tests/core/config_test.py | 148 +- tests/core/exe_test.py | 3040 +++++-------------------- 9 files changed, 1164 insertions(+), 4355 deletions(-) delete mode 100644 iceprod/core/exe_helper.py delete mode 100644 iceprod/core/pilot.py delete mode 100644 iceprod/core/serialization.py diff --git a/iceprod/core/config.py b/iceprod/core/config.py index 7d73ecaa..85414566 100644 --- a/iceprod/core/config.py +++ b/iceprod/core/config.py @@ -2,6 +2,7 @@ from dataclasses import dataclass import importlib.resources import json +import logging try: from typing import Self except ImportError: @@ -19,28 +20,63 @@ class Dataset: """IceProd Dataset config and basic attributes""" dataset_id: str dataset_num: int + jobs_submitted: int + tasks_submitted: int + tasks_per_job: int + status: str + priority: float group: str user: str + debug: bool config: dict @classmethod async def load_from_api(cls, dataset_id: str, rest_client: RestClient) -> Self: dataset = await rest_client.request('GET', f'/datasets/{dataset_id}') config = await rest_client.request('GET', f'/config/{dataset_id}') - return cls(dataset_id, dataset['dataset'], dataset['group'], dataset['username'], config) + return cls( + dataset_id=dataset_id, + dataset_num=dataset['dataset'], + jobs_submitted=dataset['jobs_submitted'], + tasks_submitted=dataset['tasks_submitted'], + tasks_per_job=dataset['tasks_per_job'], + status=dataset['status'], + priority=dataset['priority'], + group=dataset['group'], + user=dataset['username'], + debug=dataset['debug'], + config=config, + ) def fill_defaults(self): + def _load_ref(schema_value): + if '$ref' in list(schema_value.keys()): + # load from ref + parts = schema_value['$ref'].split('/')[1:] + schema_value = CONFIG_SCHEMA + while parts: + schema_value = schema_value.get(parts.pop(0), {}) + logging.debug('loading from ref: %r', schema_value) + return schema_value def _fill_dict(user, schema): for prop in schema['properties']: - v = schema['properties'][prop].get('default', None) + schema_value = _load_ref(schema['properties'][prop]) + v = schema_value.get('default', None) if prop not in user and v is not None: user[prop] = v for k in user: - schema_value = schema['properties'].get(k, {}) - if isinstance(user[k], dict) and schema_value['type'] == 'object': - _fill_dict(user[k], schema_value) - elif isinstance(user[k], list) and schema_value['type'] == 'array': - _fill_list(user[k], schema_value) + schema_value = _load_ref(schema['properties'].get(k, {})) + logging.debug('filling defaults for %s: %r', k, schema_value) + try: + t = schema_value.get('type', 'str') + logging.debug('user[k] type == %r, schema_value[type] == %r', type(user[k]), t) + if isinstance(user[k], dict) and t == 'object': + _fill_dict(user[k], schema_value) + elif isinstance(user[k], list) and t == 'array': + _fill_list(user[k], schema_value) + except KeyError: + logging.warning('error processing key %r with schema %r', k, schema_value) + raise def _fill_list(user, schema): for item in user: @@ -82,12 +118,10 @@ class Task: @classmethod async def load_from_api(cls, dataset_id: str, task_id: str, rest_client: RestClient) -> Self: - dataset, config, task = await asyncio.gather( - rest_client.request('GET', f'/datasets/{dataset_id}'), - rest_client.request('GET', f'/config/{dataset_id}'), + d, task = await asyncio.gather( + Dataset.load_from_api(dataset_id, rest_client), rest_client.request('GET', f'/datasets/{dataset_id}/tasks/{task_id}') ) - d = Dataset(dataset_id, dataset['dataset'], dataset['group'], dataset['username'], config) job = await rest_client.request('GET', f'/datasets/{dataset_id}/jobs/{task["job_id"]}') j = Job(d, task['job_id'], job['job_index'], job['status']) return cls(d, j, task['task_id'], task['task_index'], task['name'], task['depends'], task['requirements'], task['status'], task['site'], {}) diff --git a/iceprod/core/data/dataset.schema.json b/iceprod/core/data/dataset.schema.json index 9d7f663b..54766dd2 100644 --- a/iceprod/core/data/dataset.schema.json +++ b/iceprod/core/data/dataset.schema.json @@ -18,12 +18,14 @@ "options": { "description": "Internal parameters attached to a dataset", "type": "object", + "default": {}, "properties": {}, "additionalProperties": true }, "steering": { "description": "General paramters, used as references elsewhere in the config", "type": "object", + "default": {}, "properties": { "parameters": { "$ref": "#/$defs/parameters" }, "batchsys": { "$ref": "#/$defs/batchsys" }, @@ -39,7 +41,10 @@ "title": "Task", "description": "An individual batch job", "properties": { - "name": { "type": "string" }, + "name": { + "type": "string", + "description": "Unique name of task" + }, "depends": { "type": "array", "description": "Task dependency names", @@ -51,7 +56,6 @@ "description": "Enable to use the task files API" }, "data": { "$ref": "#/$defs/data" }, - "classes": { "$ref": "#/$defs/classes" }, "parameters": { "$ref": "#/$defs/parameters" }, "batchsys": { "$ref": "#/$defs/batchsys" }, "requirements": { "$ref": "#/$defs/requirements" }, @@ -67,14 +71,17 @@ "title": "Tray", "description": "Collection of modules", "properties": { - "name": { "type": "string" }, + "name": { + "type": "string", + "default": "", + "description": "Name of tray" + }, "iterations": { "type": "integer", "default": 1, "description": "Number of times to execute this tray" }, "data": { "$ref": "#/$defs/data", "deprecated": true }, - "classes": { "$ref": "#/$defs/classes", "deprecated": true }, "parameters": { "$ref": "#/$defs/parameters" }, "modules": { "type": "array", @@ -83,15 +90,21 @@ "title": "Module", "description": "The actual thing to execute, usually a script", "properties": { + "name": { + "type": "string", + "default": "", + "description": "Name of module" + }, "data": { "$ref": "#/$defs/data", "deprecated": true }, - "classes": { "$ref": "#/$defs/classes", "deprecated": true }, "parameters": { "$ref": "#/$defs/parameters" }, "running_class": { "type": "string", + "default": "", "description": "a Python class or function to call directly" }, "src": { "type": "string", + "default": "", "description": "the location of a class or script" }, "args": { @@ -100,6 +113,7 @@ { "type": "array", "items": { "type": "string" } }, { "type": "object", "additionalProperties": { "type": "string" } } ], + "default": "", "description": "args to give to a class or script" }, "env_shell": { @@ -114,7 +128,9 @@ }, "configs": { "type": "object", + "default": {}, "description": "any json config files that should be written in $PWD (format is {filename: data})", + "properties": {}, "additionalProperties": { "type": "string" } } }, @@ -136,9 +152,18 @@ "required": [ "version", "tasks", "description" ], "$defs": { - "parameters": {}, + "parameters": { + "type": "object", + "description": "Config parameters", + "default": {}, + "properties": {}, + "additionalProperties": true + }, "batchsys": { - "description": "Overrides for batch system properties", + "type": "object", + "description": "Overrides for batch system properties: {batchsys: {propname: propvalue}}", + "default": {}, + "properties": {}, "additionalProperties": { "type": "object" } @@ -154,7 +179,6 @@ }, "gpu": { "type": "integer", - "default": 0, "description": "GPUs required" }, "memory": { @@ -167,76 +191,55 @@ "default": 1.0, "description": "Disk required in GB" }, + "time": { + "type": "number", + "default": 1.0, + "description": "Time required in hours" + }, "os": { "type": "string", - "default": "", "description": "OS required in CVMFS format" }, "site": { "type": "string", - "default": "", "description": "Site required" } } }, "data": { - "type": "object", - "description": "A data file, to upload or download", - "properties": { - "remote": { - "type": "string", - "default": "", - "description": "remote url (can leave blank for temp files)" - }, - "local": { - "type": "string", - "default": "", - "description": "local file name (will use basename of remote if available)" - }, - "type": { - "enum": ["permanent", "job_temp", "dataset_temp", "site_temp"], - "default": "permanent", - "description": "type of data" - }, - "movement": { - "enum": ["input", "output", "both"], - "default": "both", - "description": "movement of data" + "type": "array", + "default": [], + "items": { + "type": "object", + "description": "A data file, to upload or download", + "properties": { + "remote": { + "type": "string", + "default": "", + "description": "remote url (can leave blank for temp files)" + }, + "local": { + "type": "string", + "default": "", + "description": "local file name (will use basename of remote if available)" + }, + "type": { + "enum": ["permanent", "job_temp", "dataset_temp", "site_temp"], + "default": "permanent", + "description": "type of data" + }, + "movement": { + "enum": ["input", "output", "both"], + "default": "input", + "description": "movement of data" + }, + "transfer": { + "anyOf": [{"type": "string"}, {"type": "boolean"}], + "default": true, + "description": "should the data be transferred" + } } } - }, - "class": { - "type": "object", - "description": "A class object, downloaded from a url", - "properties": { - "name": { - "type": "string", - "default": "", - "description": "name of class" - }, - "src": { - "type": "string", - "default": "", - "description": "url" - }, - "recursive": { - "type": "boolean", - "default": false, - "description": "recusively add to path", - "deprecated": true - }, - "libs": { - "type": "string", - "default": "", - "description": "explicit library paths" - }, - "env_vars": { - "type": "string", - "default": "", - "description": "any env vars to add" - } - }, - "required": ["name", "src"] } } } \ No newline at end of file diff --git a/iceprod/core/dataclasses.py b/iceprod/core/dataclasses.py index b2b9ae45..5f264db2 100644 --- a/iceprod/core/dataclasses.py +++ b/iceprod/core/dataclasses.py @@ -15,15 +15,11 @@ dataclass, to be used in javascript. """ -from __future__ import absolute_import, division, print_function - import time from numbers import Number, Integral -try: - String = basestring -except NameError: - String = str +String = str + # pluralizations for keys that are not classes here _plurals = { diff --git a/iceprod/core/exe.py b/iceprod/core/exe.py index 710a056a..9bff36e7 100644 --- a/iceprod/core/exe.py +++ b/iceprod/core/exe.py @@ -1,6 +1,5 @@ """ The core execution functions for running on a node. -These are all called from :any:`iceprod.core.i3exec`. The fundamental design of the core is to run a task composed of trays and modules. The general heirarchy looks like:: @@ -22,1051 +21,491 @@ Parameters can be defined at every level, and each level is treated as a scope (such that inner scopes inherit from outer scopes). This is accomplished via an internal evironment for each scope. -""" -from __future__ import absolute_import, division, print_function +Data movement should be defined at the task level. +""" -import sys -import os -import stat -import time -import glob +from contextlib import contextmanager import copy -from datetime import datetime -import asyncio - -try: - import cPickle as pickle -except Exception: - import pickle - +from dataclasses import dataclass import logging +import os +from pathlib import Path +from typing import Any, Iterator, Optional -from iceprod.core import constants -from iceprod.core import dataclasses +from iceprod.core import config +from iceprod.core.defaults import add_default_options from iceprod.core import util from iceprod.core import functions -from iceprod.core.resources import Resources import iceprod.core.parser from iceprod.core.jsonUtil import json_encode,json_decode -class Config: - """Contain the configuration and related methods""" - def __init__(self, config=None, parser=None, rpc=None, logger=None): - self.config = config if config else dataclasses.Job() - self.parser = parser if parser else iceprod.core.parser.ExpParser() - self.rpc = rpc - self.logger = logger if logger else logging +class ConfigError(Exception): + pass + + +class ConfigParser: + """ + Parse things using a config and the tray/task/module environment. - def parseValue(self, value, env={}): + Note: dataset config must be valid! + + Args: + dataset: a dataset object with config + logger: a logger object, for localized logging + """ + def __init__(self, dataset: config.Dataset, logger: Optional[logging.Logger] = None): + dataset.validate() + self.config = dataset.config + self.logger = logger if logger else logging.getLogger() + self.parser = iceprod.core.parser.ExpParser() + + def parseValue(self, value: Any, env: dict = {}) -> Any: """ Parse a value from the available env and global config. - Uses the :class:`Meta Parser ` on any string value. - Pass-through for any other object. + If the value is a string: + 1. Use the :class:`Meta Parser ` to parse the string. + 2. Expand any env variables in the result. - :param value: The value to parse - :param env: The environment to use, optional - :returns: The parsed value + If the value is not a string, pass through the value. + + Args: + value: the value to parse + env: tray/task/module env + + Returns: + the parsed value """ - if isinstance(value,dataclasses.String): - self.logger.debug('parse before:%r| env=%r',value,env) - value = self.parser.parse(value,self.config,env) - if isinstance(value,dataclasses.String): + if isinstance(value, str): + self.logger.debug('parse before:%r| env=%r', value, env) + value = self.parser.parse(value, self.config, env) + if isinstance(value, str): value = os.path.expandvars(value) - self.logger.debug('parse after:%r',value) + self.logger.debug('parse after:%r', value) return value - def parseObject(self,obj,env): - """Recursively parse a dict or list""" - if isinstance(obj,dataclasses.String): - return self.parseValue(obj,env) - elif isinstance(obj,(list,tuple)): - return [self.parseObject(v,env) for v in obj] - elif isinstance(obj,dict): - ret = copy.copy(obj) # in case it's a subclass of dict, like dataclasses + def parseObject(self, obj: Any, env: dict) -> Any: + """ + Recursively parse a dict or list. + + Do not modify original object. + + Args: + obj: object to parse + env: tray/task/module env + + Returns: + the parsed object + """ + if isinstance(obj, str): + return self.parseValue(obj, env) + elif isinstance(obj, (list, tuple)): + return [self.parseObject(v, env) for v in obj] + elif isinstance(obj, dict): + ret = copy.copy(obj) # use copy.copy in case it's a subclass of dict for k in obj: - ret[k] = self.parseObject(obj[k],env) + ret[k] = self.parseObject(obj[k], env) return ret else: return obj -class SetupEnv: +Env = dict[str, dict[str, Any]] + + +@contextmanager +def scope_env(cfg: ConfigParser, obj: dict, upperenv: Optional[Env] = None, logger: Optional[logging.Logger] = None) -> Iterator[Env]: """ - The internal environment (env) is a dictionary composed of several objects: + A context manager for parsing scoped config, such as parameters. - parameters + The returned environment is a dictionary composed of several objects: + + * parameters Parameters are defined directly as an object, or as a string pointing to another object. They can use the IceProd meta-language to be defined in relation to other parameters specified in inherited scopes, or as eval or sprinf functions. - resources - \\ - - data - Resources and data are similar in that they handle extra files that - modules may create or use. The difference is that resources are only - for reading, such as pre-built lookup tables, while data can be input - and/or output. Compression can be automatically handled by IceProd. - Both resources and data are defined in the environment as strings to - their file location. - - classes - This is where external software gets added. The software can be an - already downloaded resource or just a url to download. All python - files get added to the python path and binary libraries get symlinked - into a directory on the LD_LIBRARY_PATH. Note that if there is more - than one copy of the same shared library file, only the most recent - one is in scope. Classes are defined in the environment as strings - to their file location. - - deletions - These are files that should be deleted when the scope ends. - - uploads - These are files that should be uploaded when the scope ends. - Mostly Data objects that are used as output. - - shell environment - An environment to reset to when exiting the context manager. - - To keep the scope correct a new dictionary is created for every level, then - the inheritable objects are shallow copied (to 1 level) into the new env. - The deletions are not inheritable (start empty for each scope), and the shell - environment is set at whatever the previous scope currently has. + * input_files + A set of Data objects (urls and local paths), for files to download before + the task starts. + + * output_files + A set of Data objects (urls and local paths), for files to upload after the + task successfully completes. + + `input_files` and `output_files` are global, while `parameters` is inherited + at each scope level. Args: - cfg (:py:class:`Config`): Config object - obj (dict): A dict-like object from :py:mod:`iceprod.core.dataclasses` - such as :py:class:`iceprod.core.dataclasses.Steering`. - oldenv (dict): (optional) env that we are running inside + cfg: ConfigParser object + obj: A partial dataset config section to operate on. The local scope. + upperenv: previous scope's env output + logger: a logger object, for localized logging """ - def __init__(self, cfg, obj, oldenv={}, logger=None): - self.cfg = cfg - self.obj = obj - self.oldenv = oldenv - self.env = {} - self.logger = logger if logger else logging - - # validation of input - if not self.obj: - raise util.NoncriticalError('object to load environment from is empty') - if isinstance(self.obj, dataclasses.Steering) and not self.obj.valid(): - raise Exception('object is not valid Steering') - - async def __aenter__(self): - try: - # attempt to do depth=2 copying - for key in self.oldenv: - if key not in ('deletions','uploads','environment','pythonpath','stats'): - self.env[key] = copy.copy(self.oldenv[key]) - - # make sure things for this env are clear (don't inherit) - self.env['deletions'] = [] - self.env['uploads'] = [] - - # get clear environment variables - self.env['environment'] = os.environ.copy() - self.env['pythonpath'] = copy.copy(sys.path) - - # inherit statistics - if 'stats' in self.oldenv: - self.env['stats'] = self.oldenv['stats'] - else: - self.env['stats'] = {'upload':[], 'download':[], 'tasks':[]} - - # copy parameters - if 'parameters' not in self.env: - self.env['parameters'] = {} - if 'parameters' in self.obj: - # copy new parameters to env first so local referrals work - self.env['parameters'].update(self.obj['parameters']) - # parse parameter values and update if necessary - for p in self.obj['parameters']: - newval = self.cfg.parseValue(self.obj['parameters'][p], self.env) - if newval != self.obj['parameters'][p]: - self.env['parameters'][p] = newval - - if 'resources' not in self.env: - self.env['resources'] = {} - if 'resources' in self.obj: - # download resources - for resource in self.obj['resources']: - await downloadResource(self.env, self.cfg.parseObject(resource, self.env), logger=self.logger) - - if 'data' not in self.env: - self.env['data'] = {} - input_files = self.cfg.config['options']['input'].split() if 'input' in self.cfg.config['options'] else [] - output_files = self.cfg.config['options']['output'].split() if 'output' in self.cfg.config['options'] else [] - if 'data' in self.obj: - # download data - for data in self.obj['data']: - d = self.cfg.parseObject(data, self.env) - if d['movement'] in ('input','both'): - await downloadData(self.env, d, logger=self.logger) - if 'local' in d and d['local']: - input_files.append(d['local']) - elif 'remote' in d and d['remote']: - input_files.append(os.path.basename(d['remote'])) - if d['movement'] in ('output','both'): - self.env['uploads'].append(d) - if 'local' in d and d['local']: - output_files.append(d['local']) - elif 'remote' in d and d['remote']: - output_files.append(os.path.basename(d['remote'])) - # add input and output to parseable options - self.cfg.config['options']['input'] = ' '.join(input_files) - self.cfg.config['options']['output'] = ' '.join(output_files) - logging.info('input: %r', self.cfg.config['options']['input']) - logging.info('output: %r', self.cfg.config['options']['output']) - - if 'classes' not in self.env: - self.env['classes'] = {} - if 'classes' in self.obj: - # set up classes - for c in self.obj['classes']: - await setupClass(self.env, self.cfg.parseObject(c, self.env), logger=self.logger) - - except util.NoncriticalError: - self.logger.warning('Noncritical error when setting up environment', exc_info=True) - except Exception: - self.logger.critical('Serious error when setting up environment', exc_info=True) - raise - - return self.env - - async def __aexit__(self, exc_type, exc, tb): - try: - if not exc_type: - # upload data if there was no exception - if 'uploads' in self.env and ( - 'offline' not in self.cfg.config['options'] - or (not self.cfg.config['options']['offline']) - or ( - self.cfg.config['options']['offline'] - and 'offline_transfer' in self.cfg.config['options'] - and self.cfg.config['options']['offline_transfer'] - )): - for d in self.env['uploads']: - await uploadData(self.env, d, logger=self.logger) - finally: - # delete any files - if 'deletions' in self.env and len(self.env['deletions']) > 0: - for f in reversed(self.env['deletions']): - try: - os.remove(f) - # base = os.path.basename(f) - except OSError as e: - self.logger.error('failed to delete file %s - %s',(str(f),str(e))) - if ('options' in self.env and - 'debug' in self.env['options'] and - self.env['options']['debug']): - raise - - # reset environment - if 'environment' in self.env: - for e in list(os.environ.keys()): - if e not in self.env['environment']: - del os.environ[e] - for e in self.env['environment'].keys(): - os.environ[e] = self.env['environment'][e] - - -async def downloadResource(env, resource, remote_base=None, - local_base=None, checksum=None, logger=None): - if not logger: - logger = logging - """Download a resource and put location in the env""" - if not remote_base: - remote_base = env['options']['resource_url'] - if not resource['remote'] and not resource['local']: - raise Exception('need to specify either local or remote') - if not resource['remote']: - url = os.path.join(remote_base, resource['local']) - elif functions.isurl(resource['remote']): - url = resource['remote'] - else: - url = os.path.join(remote_base,resource['remote']) + if not obj: + raise util.NoncriticalError('object to load environment from is empty') - execute = resource.do_transfer() - if execute is False: - logger.info('not transferring file %s', url) - return + env: Env = {'parameters': {}, 'input_files': set(), 'output_files': set()} + if upperenv: + env['parameters'].update(upperenv['parameters']) + env['input_files'] = upperenv['input_files'] + env['output_files'] = upperenv['output_files'] - if not local_base: - if 'subprocess_dir' in env['options']: - local_base = env['options']['subprocess_dir'] + logger = logger if logger else logging.getLogger() + + try: + # copy parameters + if 'parameters' in obj: + # copy new parameters to env first so local referrals work + env['parameters'].update(obj['parameters']) + # parse parameter values and update if necessary + for p in obj['parameters']: + newval = cfg.parseValue(obj['parameters'][p], env) + if newval != obj['parameters'][p]: + env['parameters'][p] = newval + + if 'data' in obj: + # download data + for data in obj['data']: + d = cfg.parseObject(data, env) + if d['movement'] in ('input','both'): + ret = downloadData(d, cfg=cfg, logger=logger) + if ret: + env['input_files'].add(ret) + if d['movement'] in ('output','both'): + ret = uploadData(d, cfg=cfg, logger=logger) + if ret: + env['output_files'].add(ret) + + except util.NoncriticalError: + logger.warning('Noncritical error when setting up environment', exc_info=True) + except Exception: + logger.critical('Serious error when setting up environment', exc_info=True) + raise + + yield env + + +@dataclass(frozen=True, slots=True) +class Data: + """ + IceProd Data instance + + Args: + url: url location + local: local filename + """ + url: str + local: str + + +def storage_location(data: dict, config: dict = {}) -> str: + """ + Get data storage location from the config. + + Args: + data: data config object + config: dataset config + + Returns: + storage location + """ + type_ = data['type'].lower() + if type_ not in ['permanent', 'job_temp', 'dataset_temp', 'site_temp']: + raise ConfigError('data movement "type" is unknown') + if 'options' in config and type_ in config['options']: + return str(config['options'][type_]) + elif type_ == 'permanent': + if 'options' in config and 'data_url' in config['options']: + return str(config['options']['data_url']) else: - local_base = os.getcwd() - if not resource['local']: - resource['local'] = os.path.basename(resource['remote']) - local = os.path.join(local_base,resource['local']) - if 'files' not in env: - env['files'] = {} - if not os.path.exists(os.path.dirname(local)): - os.makedirs(os.path.dirname(local)) - - # get resource - if resource['local'] in env['files']: - logger.info('resource %s already exists in env, so skip download and compression',resource['local']) - return - elif os.path.exists(local): - logger.info('resource %s already exists as file, so skip download',resource['local']) + raise ConfigError('"data_url" not defined in config["options"]') else: - # download resource - download_options = {} - if 'options' in env and 'username' in env['options']: - download_options['username'] = env['options']['username'] - if 'options' in env and 'password' in env['options']: - download_options['password'] = env['options']['password'] - if 'options' in env and 'ssl' in env['options'] and env['options']['ssl']: - download_options.update(env['options']['ssl']) - if 'options' in env and 'credentials' in env['options']: - for base_url in env['options']['credentials']: - if resource['remote'].startswith(base_url): - logger.info('using credential for %s', base_url) - cred_file = os.path.join(env['options']['credentials_dir'], env['options']['credentials'][base_url]) - try: - with open(cred_file) as f: - token = f.read() - except Exception: - logger.critical('failed to load credential at %s', cred_file) - raise Exception('failed to download {} to {}'.format(url, local)) - download_options['token'] = token - break - failed = False - try: - start_time = time.time() - await functions.download(url, local, options=download_options) - if not os.path.exists(local): - raise Exception('file does not exist') - if checksum: - # check the checksum - cksm = functions.sha512sum(local) - if cksm != checksum: - raise Exception('checksum validation failed') - except Exception: - if execute is False or execute == 'maybe': - logger.info('not transferring file %s', url) - return - failed = True - logger.critical('failed to download %s to %s', url, local, exc_info=True) - raise Exception('failed to download {} to {}'.format(url, local)) - finally: - stats = { - 'name': url, - 'error': failed, - 'now': datetime.utcnow().isoformat(), - 'duration': time.time()-start_time, - } - if (not failed) and os.path.exists(local): - stats['size'] = os.path.getsize(local) - stats['rate_MBps'] = stats['size']/1000/1000/stats['duration'] - - if 'stats' in env and (execute is True or (execute == 'maybe' and not failed)): - if 'download' not in env['stats']: - env['stats']['download'] = [] - env['stats']['download'].append(stats) - - if (not failed) and 'data_movement_stats' in env['options'] and env['options']['data_movement_stats']: - print(f'{stats["now"]} Data movement stats: input {stats["duration"]:.3f} {stats["size"]:.0f} {stats["name"]}') - - # check compression - if resource['compression'] and (functions.iscompressed(url) or functions.istarred(url)): - # uncompress file - files = functions.uncompress(local) - # add uncompressed file(s) to env - env['files'][resource['local']] = files - else: - # add file to env - env['files'][resource['local']] = local - logger.warning('resource %s added to env',resource['local']) + raise ConfigError(f'{type_} not defined in config["options"]') -async def downloadData(env, data, logger=None): - """Download data and put location in the env""" - if not logger: - logger = logging - remote_base = data.storage_location(env) - if 'options' in env and 'subprocess_dir' in env['options']: - local_base = env['options']['subprocess_dir'] - else: - local_base = os.getcwd() - - execute = data.do_transfer() - checksum = None - if execute is not False: - try: - filecatalog = data.filecatalog(env) - path, checksum = filecatalog.get(data['local']) - except Exception: - # no filecatalog available - pass - await downloadResource(env, data, remote_base, local_base, - checksum=checksum, logger=logger) - - -async def uploadData(env, data, logger=None): - """Upload data""" - if not logger: - logger = logging - remote_base = data.storage_location(env) - if 'options' in env and 'subprocess_dir' in env['options']: - local_base = env['options']['subprocess_dir'] - else: - local_base = os.getcwd() - if (not data['remote']) and not data['local']: - raise Exception('need either remote or local defined') - if not data['remote']: - url = os.path.join(remote_base, data['local']) - elif not functions.isurl(data['remote']): - url = os.path.join(remote_base, data['remote']) - else: - url = data['remote'] +def do_transfer(data: dict) -> bool: + """ + Test if we should actually transfer the file. - if not data['local']: - data['local'] = os.path.basename(data['remote']) - local = os.path.join(local_base, data['local']) + Args: + data: data config object + """ + ret = True + if isinstance(data['transfer'], bool): + ret = data['transfer'] + elif isinstance(data['transfer'], str): + t = data['transfer'].lower() + if t in ('n', 'no', 'not', 'f', 'false'): + ret = False + elif isinstance(data['transfer'], (int, float)): + if data['transfer'] == 0: + ret = False + return ret + + +def downloadData(data: dict, cfg: ConfigParser, logger=None) -> Optional[Data]: + """ + Parse download url and local filename. - execute = data.do_transfer() - exists = os.path.exists(local) - if execute is False or (execute == 'maybe' and not exists): - logger.info('not transferring file %s', local) - return - elif not exists: - raise Exception('file {} does not exist'.format(local)) - - # check compression - if data['compression']: - # get compression type, if specified - if ((functions.iscompressed(url) or functions.istarred(url)) and - not (functions.iscompressed(local) or functions.istarred(local))): - # url has compression on it, so use that - if '.tar.' in url: - c = '.'.join(url.rsplit('.',2)[-2:]) - else: - c = url.rsplit('.',1)[-1] - try: - local = functions.compress(local,c) - except Exception: - logger.warning('cannot compress file %s to %s', local, c) - raise - - # upload file - upload_options = {} - if 'options' in env and 'username' in env['options']: - upload_options['username'] = env['options']['username'] - if 'options' in env and 'password' in env['options']: - upload_options['password'] = env['options']['password'] - if 'options' in env and 'ssl' in env['options'] and env['options']['ssl']: - upload_options.update(env['options']['ssl']) - if 'options' in env and 'credentials' in env['options']: - for base_url in env['options']['credentials']: - if data['remote'].startswith(base_url): - logger.info('using credential for %s', base_url) - cred_file = os.path.join(env['options']['credentials_dir'], env['options']['credentials'][base_url]) - try: - with open(cred_file) as f: - token = f.read() - except Exception: - logger.critical('failed to load credential at %s', cred_file) - raise Exception('failed to upload {} to {}'.format(url, local)) - upload_options['token'] = token - break - do_checksum = True - if 'options' in env and 'upload_checksum' in env['options']: - do_checksum = env['options']['upload_checksum'] - failed = False - try: - start_time = time.time() - await functions.upload(local, url, checksum=do_checksum, options=upload_options) - except Exception: - failed = True - logger.critical('failed to upload %s to %s', local, url, exc_info=True) - raise Exception('failed to upload {} to {}'.format(local, url)) - finally: - stats = { - 'name': url, - 'error': failed, - 'now': datetime.utcnow().isoformat(), - 'duration': time.time()-start_time, - } - if not failed: - stats['size'] = os.path.getsize(local) - stats['rate_MBps'] = stats['size']/1000/1000/stats['duration'] - if 'stats' in env: - env['stats']['upload'].append(stats) - - if (not failed) and 'data_movement_stats' in env['options'] and env['options']['data_movement_stats']: - print(f'{stats["now"]} Data movement stats: output {stats["duration"]:.3f} {stats["size"]:.0f} {stats["name"]}') - - # if successful, add to filecatalog - try: - filecatalog = data.filecatalog(env) - except Exception: - pass # no filecatalog available - else: - try: - cksm = functions.sha512sum(local) - metadata = { - 'file_size': stats['size'], - 'create_date': stats['now'], - 'modify_date': stats['now'], - 'data_type': 'simulation', - 'transfer_duration': stats['duration'], - 'transfer_MBps': stats['rate_MBps'], - } - options = ('dataset','dataset_id','task_id','task','job','debug') - metadata.update({env['options'][k] for k in options if k in env['options']}) - filecatalog.add(data['local'], url, cksm, metadata) - except Exception: - logger.warning('failed to add %r to filecatalog', url, exc_info=True) - - -async def setupClass(env, class_obj, logger=None): - """Set up a class for use in modules, and put it in the env""" + Args: + data: data config object + cfg: config parser + + Returns: + either None or a Data object + """ if not logger: logger = logging - if 'classes' not in env: - env['classes'] = {} - if not class_obj: - raise Exception('Class is not defined') - loaded = False - if class_obj['name'] in env['classes']: - # class already loaded, so leave it alone - logger.info('class %s already loaded',class_obj['name']) - elif class_obj['resource_name']: - # class is downloaded as a resource - if 'files' not in env or class_obj['resource_name'] not in env['files']: - logger.error('resource %s for class %s does not exist', - class_obj['resource_name'],class_obj['name']) - else: - local = env['files'][class_obj['resource_name']] - if not isinstance(local,dataclasses.String): - local = local[0] - if class_obj['src'] and os.path.exists(os.path.join(local,class_obj['src'])): - # treat src as a path inside the resource - local = os.path.join(local,class_obj['src']) - loaded = True + remote_base = storage_location(data, cfg.config) + remote = str(data['remote']) if data['remote'] is not None else '' + local = str(data['local']) if data['local'] is not None else '' + + if not remote and not local: + raise ConfigError('need either "remote" or "local" defined for data') + if not remote: + url = os.path.join(remote_base, local) + elif functions.isurl(remote): + url = remote else: - # get url of class - i = 0 - while True: - url = class_obj['src'] - if url and functions.isurl(url): - i = 10 # skip repeat download attempts - else: - if i == 0: - # first, look in resources - if 'options' in env and 'resource_url' in env['options']: - url = os.path.join(env['options']['resource_url'],class_obj['src']) - else: - url = os.path.join('http://prod-exe.icecube.wisc.edu/',class_obj['src']) - elif i == 1: - # then, look in regular svn - if 'options' in env and 'svn_repository' in env['options']: - url = os.path.join(env['options']['svn_repository'],class_obj['src']) - else: - url = os.path.join('http://code.icecube.wisc.edu/svn/projects/',class_obj['src']) - else: - raise util.NoncriticalError('Cannot find class %s because of bad src url'%class_obj['name']) + url = os.path.join(remote_base, remote) - if 'options' in env and 'local_temp' in env['options']: - local_temp = env['options']['local_temp'] - else: - local_temp = os.path.join(os.getcwd(),'classes') - env['options']['local_temp'] = local_temp - if not os.path.exists(local_temp): - os.makedirs(local_temp) - if 'PYTHONPATH' in os.environ and local_temp not in os.environ['PYTHONPATH']: - os.environ['PYTHONPATH'] += ':'+local_temp - elif 'PYTHONPATH' not in os.environ: - os.environ['PYTHONPATH'] = local_temp - - local = os.path.join(local_temp,class_obj['name'].replace(' ','_')) - - download_options = {} - if 'options' in env and 'username' in env['options']: - download_options['username'] = env['options']['username'] - if 'options' in env and 'password' in env['options']: - download_options['password'] = env['options']['password'] - if 'options' in env and 'ssl' in env['options'] and env['options']['ssl']: - download_options.update(env['options']['ssl']) - - # download class - logger.warning('attempting to download class %s to %s',url,local_temp) - try: - download_local = await functions.download(url, local_temp, options=download_options) - except Exception: - logger.info('download failed, {} attempts left'.format(i), exc_info=True) - if i < 10: - i += 1 - continue # retry with different url - raise - if not os.path.exists(download_local): - raise Exception('failed to download {} to {}'.format(url, local)) - if functions.iscompressed(download_local) or functions.istarred(download_local): - files = functions.uncompress(download_local, out_dir=local_temp) - # check if we extracted a tarfile - if isinstance(files,dataclasses.String): - local = files - elif isinstance(files,list): - dirname = os.path.commonprefix(files) - if dirname: - dirname = os.path.join(local_temp, dirname.split(os.path.sep)[0]) - else: - dirname = local_temp - logger.info('looking up tarball at %r', dirname) - if os.path.isdir(dirname): - logger.info('rename %r to %r', local, dirname) - local = dirname - else: - logger.warning('files is strange datatype: %r', type(files)) - elif local != download_local: - logger.info('rename %r to %r', download_local, local) - os.rename(download_local, local) - loaded = True - break - - if loaded: - # add to env - env['classes'][class_obj['name']] = local - logger.warning('class %s loaded at %r',class_obj['name'],local) - - # add binary libraries to the LD_LIBRARY_PATH - def ldpath(root,f=None): - root = os.path.abspath(root) - - def islib(f): - return f[-3:] == '.so' or '.so.' in f or f[-2:] == '.a' or '.a.' in f - if (f and islib(f)) or any(islib(f) for f in os.listdir(root)): - logger.info('adding to LD_LIBRARY_PATH: %s',root) - if 'LD_LIBRARY_PATH' in os.environ: - if root in os.environ['LD_LIBRARY_PATH'].split(':'): - return # already present - os.environ['LD_LIBRARY_PATH'] = root+':'+os.environ['LD_LIBRARY_PATH'] - else: - os.environ['LD_LIBRARY_PATH'] = root - else: - logger.debug('no libs in %s',root) - - def addToPythonPath(root): - if glob.glob(os.path.join(root,'*.py')): - logger.info('adding to PYTHONPATH: %s',root) - if 'PYTHONPATH' in os.environ: - if root in os.environ['PYTHONPATH'].split(':'): - return # already present - os.environ['PYTHONPATH'] = root+':'+os.environ['PYTHONPATH'] - else: - os.environ['PYTHONPATH'] = root - else: - logger.debug('no python files: %s',root) - if os.path.isdir(local): - # build search list - search_list = [local] - search_list.extend(glob.glob(os.path.join(local,'lib*'))) - search_list.extend(glob.glob(os.path.join(local,'lib*/python*/*-packages'))) - if class_obj['libs'] is not None: - search_list.extend(os.path.join(local,x) for x in class_obj['libs'].split(':')) - for s in search_list: - if not os.path.isdir(s): - continue - addToPythonPath(s) - ldpath(s) - elif os.path.exists(local): - root, f = os.path.split(local) - if f.endswith('.py'): - if root not in sys.path: - addToPythonPath(root) - else: - # check for binary library - ldpath(root,f) - # modify environment variables - logger.info('env_vars = %s',class_obj['env_vars']) - if class_obj['env_vars']: - for e in class_obj['env_vars'].split(';'): - try: - k,v = e.split('=') - except ValueError as e: - logger.warning('bad env variable: %s',e) - continue - v = v.replace('$CLASS',local) - logger.info('setting envvar: %s = %s',k,v) - if k in os.environ: - os.environ[k] = v+':'+os.environ[k] - else: - os.environ[k] = v + execute = do_transfer(data) + if execute is False: + logger.info('not transferring file %s', url) + return + if not local: + local = os.path.basename(remote) + + return Data(url, local) -# Run Functions # +def uploadData(data: dict, cfg: ConfigParser, logger=None) -> Optional[Data]: + """ + Parse download url and local filename. + + Args: + data: data config object + cfg: config parser -async def runtask(cfg, globalenv, task, logger=None): - """Run the specified task""" - if not task: - raise Exception('No task provided') + Returns: + either None or a Data object + """ if not logger: logger = logging + remote_base = storage_location(data, cfg.config) + remote = str(data['remote']) if data['remote'] is not None else '' + local = str(data['local']) if data['local'] is not None else '' + + if not remote and not local: + raise ConfigError('need either "remote" or "local" defined for data') + if not remote: + url = os.path.join(remote_base, local) + elif not functions.isurl(remote): + url = os.path.join(remote_base, remote) + else: + url = remote - # set up task_temp - if not os.path.exists('task_temp'): - os.mkdir('task_temp') - globalenv['task_temp'] = os.path.join(os.getcwd(),'task_temp') + if not local: + local = os.path.basename(remote) - # set up stats - stats = {} + execute = do_transfer(data) + if execute is False: + logger.info('not transferring file %s', local) + return - # check if we have any files in the task_files API - if task['task_files'] and ((not cfg.config['options']['offline']) or cfg.config['options']['offline_transfer']): - files = await cfg.rpc.task_files(cfg.config['options']['dataset_id'], - cfg.config['options']['task_id']) - task['data'].extend(files) + return Data(url, local) - try: - # set up local env - async with SetupEnv(cfg, task, globalenv, logger=logger) as env: - # run trays - for tray in task['trays']: - tmpstat = {} - async for proc in runtray(cfg, env, tray, stats=tmpstat, logger=logger): - yield proc - if len(tmpstat) > 1: - stats[tray['name']] = tmpstat - elif len(tmpstat) == 1: - stats[tray['name']] = tmpstat[list(tmpstat.keys())[0]] - finally: - # destroy task temp - try: - functions.removedirs('task_temp') - except Exception as e: - logger.warning('error removing task_temp directory: %r', - e, exc_info=True) - - globalenv['stats']['tasks'].append(stats) - - -async def runtray(cfg, globalenv,tray,stats={}, logger=None): - """Run the specified tray""" - if not tray: - raise Exception('No tray provided') - if not logger: - logger = logging - # set up tray_temp - if not os.path.exists('tray_temp'): - os.mkdir('tray_temp') - globalenv['tray_temp'] = os.path.join(os.getcwd(),'tray_temp') +# Run Functions # - # run iterations - try: - tmpenv = globalenv.copy() - for i in range(tray['iterations']): - # set up local env - cfg.config['options']['iter'] = i - tmpstat = {} - async with SetupEnv(cfg, tray, tmpenv, logger=logger) as env: - # run modules - for module in tray['modules']: - async for proc in runmodule(cfg, env, module, stats=tmpstat, logger=logger): - yield proc - stats[i] = tmpstat - - finally: - # destroy tray temp - try: - functions.removedirs('tray_temp') - except Exception as e: - logger.warning('error removing tray_temp directory: %s', - str(e), exc_info=True) - - -async def runmodule(cfg, globalenv, module, stats={}, logger=None): - """Run the specified module""" - if not module: - raise Exception('No module provided') - if not logger: - logger = logging +class WriteToScript: + """ + Write a task to a Bash script, to execute manually. - # set up local env - module = module.copy() - async with SetupEnv(cfg, module, globalenv, logger=logger) as env: + Args: + task: a task object, with dataset config + workdir: a directory to write the task and any related files + options: extra dataset config options + logger: a logger object, for localized logging + """ + def __init__(self, task: config.Task, workdir: Path, options: Optional[dict] = None, logger: Optional[logging.Logger] = None): + self.task = task + self.workdir = workdir + self.logger = logger if logger else logging.getLogger() + + # default config setup + self.options = self.task.dataset.config['options'] + self._fill_options() + if options: + self.options.update(options) + self.cfgparser = ConfigParser(self.task.dataset, logger=self.logger) + + # set up script + self.infiles: set[Data] = set() + self.outfiles: set[Data] = set() + + def _fill_options(self): + self.options['dataset_id'] = self.task.dataset.dataset_id + self.options['dataset'] = self.task.dataset.dataset_num + self.options['job'] = self.task.job.job_index + self.options['jobs_submitted'] = self.task.dataset.jobs_submitted + self.options['task_id'] = self.task.task_id + self.options['task'] = self.task.name + self.options['debug'] = self.task.dataset.debug + + async def convert(self): + scriptname = self.workdir / 'task_runner.sh' + with open(scriptname, 'w') as f: + print('#!/bin/sh', file=f) + print('set -e', file=f) + add_default_options(self.options) + print('# Options:', file=f) + for field in self.options: + print(f'# {field}={self.options[field]}', file=f) + print('', file=f) + with scope_env(self.cfgparser, self.task.dataset.config['steering'], logger=self.logger) as globalenv: + task = self.task.get_task_config() + self.logger.debug('converting task %s', self.task.name) + with scope_env(self.cfgparser, task, globalenv, logger=self.logger) as taskenv: + for i, tray in enumerate(task['trays']): + trayname = tray['name'] if tray.get('name', '') else i + for iteration in range(tray['iterations']): + self.options['iter'] = iteration + self.logger.debug('converting tray %r iter %d', trayname, iteration) + print(f'# running tray {trayname}, iter {iteration}', file=f) + with scope_env(self.cfgparser, tray, taskenv, logger=self.logger) as trayenv: + for j, module in enumerate(tray['modules']): + modulename = module['name'] if module.get('name', '') else j + self.logger.debug('converting module %r', modulename) + print(f'# running module {modulename}', file=f) + with scope_env(self.cfgparser, module, trayenv, logger=self.logger) as moduleenv: + await self._write_module(module, moduleenv, file=f) + print('', file=f) + + self.infiles = globalenv['input_files'] + self.outfiles = globalenv['output_files'] + + return scriptname + + async def _write_module(self, module, env, file): + module = module.copy() if module['running_class']: - module['running_class'] = cfg.parseValue(module['running_class'],env) + module['running_class'] = self.cfgparser.parseValue(module['running_class'], env) if module['args']: - module['args'] = cfg.parseObject(module['args'],env) + module['args'] = self.cfgparser.parseObject(module['args'], env) if module['src']: - module['src'] = cfg.parseValue(module['src'],env) + module['src'] = self.cfgparser.parseValue(module['src'], env) if module['env_shell']: - module['env_shell'] = cfg.parseValue(module['env_shell'],env) + module['env_shell'] = self.cfgparser.parseValue(module['env_shell'], env) if module['configs']: # parse twice to make sure it's parsed, even if it starts as a string - module['configs'] = cfg.parseObject(module['configs'],env) - module['configs'] = cfg.parseObject(module['configs'],env) - - # make subprocess to run the module - async with ForkModule(cfg, env, module, logger=logger, stats=stats) as process: - # yield process back to pilot or driver, so it can be killed - yield process + module['configs'] = self.cfgparser.parseObject(module['configs'], env) + module['configs'] = self.cfgparser.parseObject(module['configs'], env) - -class ForkModule: - """ - Modules are run in a forked process to prevent segfaults from killing IceProd. - Their stdout and stderr is dumped into the log file with prefixes on each - line to designate its source. Any error or the return value is returned to - the main process via a Queue. - - If a module defines a src, that is assumed to be a Class which should be - added to the env. The running_class is where the exact script or binary - is chosen. It can match several things: - - * A fully defined python module.class import (also takes module.function) - * A python class defined in the src provided - * A regular python script - * An executable of some type (this is run in a subprocess with shell - execution disabled) - """ - def __init__(self, cfg, env, module, logger=None, stats=None): - self.cfg = cfg - self.env = env - self.module = module - if not logger: - logger = logging - self.logger = logger - self.stats = stats if stats else {} - self.proc = None - - self.error_filename = constants['task_exception'] - self.stats_filename = constants['stats'] - if 'subprocess_dir' in cfg.config['options'] and cfg.config['options']['subprocess_dir']: - subdir = cfg.config['options']['subprocess_dir'] - self.error_filename = os.path.join(subdir, self.error_filename) - self.stats_filename = os.path.join(subdir, self.stats_filename) - - if os.path.exists(self.error_filename): - os.remove(self.error_filename) - - async def __aenter__(self): module_src = None - if self.module['src']: - if not functions.isurl(self.module['src']): - module_src = self.module['src'] - else: - # get script to run - c = dataclasses.Class() - c['src'] = self.module['src'] - c['name'] = os.path.basename(c['src']) - if '?' in c['name']: - c['name'] = c['name'][:c['name'].find('?')] - elif '#' in c['name']: - c['name'] = c['name'][:c['name'].find('#')] - await setupClass(self.env,c,logger=self.logger) - if c['name'] not in self.env['classes']: - raise Exception('Failed to install class %s'%c['name']) - module_src = self.env['classes'][c['name']] + if module['src']: + module_src = module['src'] + if functions.isurl(module_src): + path = os.path.basename(module_src).split('?', 0)[0].split('#', 0)[0] + env['input_files'][module_src] = path + module_src = path # set up env_shell - env_shell = None - if self.module['env_shell']: - env_shell = self.module['env_shell'].split() - self.logger.info('searching for env_shell at %r', env_shell[0]) - if not os.path.exists(env_shell[0]): - env_class = env_shell[0].split('/')[0] - self.logger.info('searching for env_shell as %r class', env_class) - if env_class in self.env['classes']: - env_tmp = env_shell[0].split('/') - env_tmp[0] = self.env['classes'][env_class] - env_shell[0] = '/'.join(env_tmp) - else: - self.logger.info('attempting to download env_shell') - c = dataclasses.Class() - c['src'] = env_shell[0] - c['name'] = os.path.basename(c['src']) - await setupClass(self.env,c,logger=self.logger) - if c['name'] not in self.env['classes']: - raise Exception('Failed to install class %s'%c['name']) - env_shell[0] = self.env['classes'][c['name']] + env_shell = [] + if module['env_shell']: + env_shell = module['env_shell'].split() + if functions.isurl(env_shell[0]): + path = os.path.basename(env_shell[0]).split('?', 0)[0].split('#', 0)[0] + env['input_files'][env_shell[0]] = path + env_shell[0] = f'./{path}' if module_src: - self.logger.warning('running module \'%s\' with src %s', - self.module['name'], module_src) + self.logger.info('running module %r with src %s', module['name'], module_src) else: - self.logger.warning('running module \'%s\' with class %s', - self.module['name'], self.module['running_class']) + self.logger.info('running module %r with class %s', module['name'], module['running_class']) # set up the args - args = self.module['args'] - if args: - self.logger.warning('args=%s',args) - if args and isinstance(args,dataclasses.String) and args[0] in ('{','['): + args = module['args'] + if args is not None and args != '': + self.logger.warning('args=%s', args) + if args and isinstance(args, str) and args[0] in ('{', '['): args = json_decode(args) - if args and isinstance(args, dict) and set(args) == {'args','kwargs'}: - args = self.cfg.parseObject(args, self.env) - elif isinstance(args,dataclasses.String): - args = {"args":[self.cfg.parseValue(x,self.env) for x in args.split()],"kwargs":{}} - elif isinstance(args,list): - args = {"args":[self.cfg.parseValue(x,self.env) for x in args],"kwargs":{}} - elif isinstance(args,dict): - args = {"args":[],"kwargs":self.cfg.parseObject(args,self.env)} + if args and isinstance(args, dict) and set(args) == {'args', 'kwargs'}: + args = self.cfgparser.parseObject(args, env) + elif isinstance(args, str): + args = {"args": [self.cfgparser.parseValue(x, env) for x in args.split()], "kwargs": {}} + elif isinstance(args, list): + args = {"args": [self.cfgparser.parseValue(x, env) for x in args], "kwargs": {}} + elif isinstance(args, dict): + args = {"args": [], "kwargs": self.cfgparser.parseObject(args, env)} else: - raise Exception('args is unknown type') + args = {"args": [str(args)], "kwargs": {}} + + # convert to cmdline args + def splitter(a,b): + ret = ('-%s' if len(str(a)) <= 1 else '--%s')%str(a) + if b is None: + return ret + else: + return ret+'='+str(b) + args = args['args'] + [splitter(a, args['kwargs'][a]) for a in args['kwargs']] + + # force args to string + def toStr(a): + if isinstance(a,(bytes,str)): + return a + else: + return str(a) + args = [toStr(a) for a in args] + else: + args = [] # set up the environment cmd = [] if env_shell: cmd.extend(env_shell) - kwargs = {'close_fds': True} - if 'subprocess_dir' in self.cfg.config['options'] and self.cfg.config['options']['subprocess_dir']: - subdir = self.cfg.config['options']['subprocess_dir'] - if not os.path.exists(subdir): - os.makedirs(subdir) - kwargs['cwd'] = subdir - else: - kwargs['cwd'] = os.getcwd() - self.stdout = open(os.path.join(kwargs['cwd'], constants['stdout']), 'ab') - self.stderr = open(os.path.join(kwargs['cwd'], constants['stderr']), 'ab') - kwargs['stdout'] = self.stdout - kwargs['stderr'] = self.stderr - # set up configs - if self.module['configs']: - for filename in self.module['configs']: + if module['configs']: + for filename in module['configs']: self.logger.info('creating config %r', filename) - with open(os.path.join(kwargs['cwd'], filename),'w') as f: - f.write(json_encode(self.module['configs'][filename])) + with open(self.workdir / filename, 'w') as f: + f.write(json_encode(module['configs'][filename])) + env['input_files'][str(self.workdir / filename)] = filename # run the module - if self.module['running_class']: - self.logger.info('run as a class using the helper script') - exe_helper = os.path.join(os.path.dirname(os.path.abspath(__file__)), - 'exe_helper.py') - cmd.extend(['python', exe_helper, '--classname', - self.module['running_class']]) - if self.env['options']['debug']: - cmd.append('--debug') - if module_src: - cmd.extend(['--filename', module_src]) - if args: - args_filename = constants['args'] - if 'cwd' in kwargs: - args_filename = os.path.join(kwargs['cwd'], args_filename) - with open(args_filename,'w') as f: - f.write(json_encode(args)) - cmd.append('--args') + if module['running_class']: + self.logger.info('run as a python module') + cmd.extend(['python', '-m', module['running_class']] + args) elif module_src: self.logger.info('run as a script directly') - if args: - def splitter(a,b): - ret = ('-%s' if len(str(a)) <= 1 else '--%s')%str(a) - if b is None: - return ret - else: - return ret+'='+str(b) - args = args['args']+[splitter(a,args['kwargs'][a]) for a in args['kwargs']] - - # force args to string - def toStr(a): - if isinstance(a,(bytes,str)): - return a - else: - return str(a) - args = [toStr(a) for a in args] - else: - args = [] - - shebang = False - if os.path.exists(module_src): - try: - with open(module_src) as f: - if f.read(10).startswith('#!'): - # shebang found - mode = os.stat(module_src).st_mode - if not (mode & stat.S_IXUSR): - os.chmod(module_src, mode | stat.S_IXUSR) - shebang = True - except Exception: - self.logger.warning('cannot get shebang for %s', module_src, - exc_info=True) - - if (not shebang) and module_src[-3:] == '.py': + if module_src[-3:] == '.py': # call as python script - cmd.extend(['python', module_src]+args) - elif (not shebang) and module_src[-3:] == '.sh': + cmd.extend(['python', module_src] + args) + elif module_src[-3:] == '.sh': # call as shell script - cmd.extend(['/bin/sh', module_src]+args) + cmd.extend(['/bin/sh', module_src] + args) else: # call as regular executable - cmd.extend([module_src]+args) + if module_src[0] != '/': + module_src = f'./{module_src}' + cmd.extend([module_src] + args) else: self.logger.error('module is missing class and src') - raise Exception('error running module') + raise ConfigError('error running module - need either "class" or "src"') - self.logger.warning('subprocess cmd=%r',cmd) - if self.module['env_clear']: + if module['env_clear']: # must be on cvmfs-like environ for this to apply - env = {'PYTHONNOUSERSITE':'1'} - if 'SROOT' in os.environ: - prefix = os.environ['SROOT'] - elif 'ICEPRODROOT' in os.environ: - prefix = os.environ['ICEPRODROOT'] - else: - prefix = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - for k in os.environ: - if k in ('OPENCL_VENDOR_PATH','http_proxy','TMP','TMPDIR','_CONDOR_SCRATCH_DIR'): - # pass through unchanged - env[k] = os.environ[k] - elif ('sroot' in k.lower() or 'iceprod' in k.lower() or - k in ('CUDA_VISIBLE_DEVICES','COMPUTE','GPU_DEVICE_ORDINAL')): - # don't pass these at all - pass - else: - # filter SROOT out of environ - ret = [x for x in os.environ[k].split(':') if x.strip() and (not x.startswith(prefix)) and 'iceprod' not in x.lower()] - if ret: - env[k] = ':'.join(ret) - # handle resource environment - if 'resources' in self.cfg.config['options']: - Resources.set_env(self.cfg.config['options']['resources'], env) - self.logger.warning('env = %r', env) - kwargs['env'] = env - - self.proc = await asyncio.create_subprocess_exec(*cmd, **kwargs) - return self.proc - - async def __aexit__(self, exc_type, exc, tb): - try: - self.stdout.close() - self.stderr.close() - except Exception: - pass - if not exc_type: - # now clean up after process - if self.proc and self.proc.returncode: - self.logger.warning('return code: {}'.format(self.proc.returncode)) - try: - with open(self.error_filename, 'rb') as f: - e = pickle.load(f) - except Exception: - self.logger.warning('cannot load exception info from failed module', ) - raise Exception('module failed') - else: - if isinstance(e, Exception): - raise e - else: - raise Exception(str(e)) - - # get stats, if available - if os.path.exists(self.stats_filename): - try: - new_stats = pickle.load(open(self.stats_filename, 'rb')) - if self.module['name']: - self.stats[self.module['name']] = new_stats - else: - self.stats.update(new_stats) - except Exception: - self.logger.warning('cannot load stats info from module') + envstr = 'env -i PYTHONNOUSERSITE=1 ' + for k in ('OPENCL_VENDOR_PATH', 'http_proxy', 'TMP', 'TMPDIR', '_CONDOR_SCRATCH_DIR', 'CUDA_VISIBLE_DEVICES', 'COMPUTE', 'GPU_DEVICE_ORDINAL'): + envstr += f'{k}=${k} ' + cmd = envstr.split()+cmd + + self.logger.info('cmd=%r',cmd) + print(' '.join(cmd), file=file) diff --git a/iceprod/core/exe_helper.py b/iceprod/core/exe_helper.py deleted file mode 100644 index 64446d1b..00000000 --- a/iceprod/core/exe_helper.py +++ /dev/null @@ -1,133 +0,0 @@ -""" -Help run class-based modules, including iceprod modules. - -This is run in a subprocess to help set up the environment, -as well as contain any crashes. - -Note that this file should be backward-compatible with python 2.7+, -as it will be run under the user's environment. -""" - -from __future__ import absolute_import, division, print_function - -import os -import imp -import inspect -import logging -import importlib -try: - from collections.abc import Iterable -except ImportError: - from collections import Iterable - -try: - import cPickle as pickle -except Exception: - import pickle - -from json import loads as json_decode - -# from iceprod.core import constants -constants_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),'__init__.py') -try: - spec = importlib.util.spec_from_file_location('constants', constants_path) - constants_mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(constants_mod) -except AttributeError: - constants_mod = imp.load_source('constants',constants_path) -constants = constants_mod.constants - -try: - String = basestring -except NameError: - String = str - - -def get_args(): - """Read json of [args, kwargs] from the std args file""" - with open(constants['args']) as f: - data = f.read() - logging.debug('get_args raw: %r',data) - return json_decode(data) - - -def unicode_to_ascii(obj): - if isinstance(obj,String): - return str(obj) - elif isinstance(obj,dict): - ret = {} - for k in obj: - ret[unicode_to_ascii(k)] = unicode_to_ascii(obj[k]) - return ret - elif isinstance(obj,set): - return set(unicode_to_ascii(k) for k in obj) - elif isinstance(obj,Iterable): - return [unicode_to_ascii(k) for k in obj] - else: - return obj - - -def run(classname, filename=None, args=False, debug=False): - logging.basicConfig(level=logging.DEBUG if debug else logging.WARN) - logging.warning('exe_helper(%s)', classname) - - if not classname: - raise Exception('classname is missing') - class_args = {'args':[],'kwargs':{}} - if args: - class_args = get_args() - logging.info('args: %r', class_args) - class_args = unicode_to_ascii(class_args) - parts = classname.rsplit('.',1) - if len(parts) == 1: - p,cl = os.path.basename(filename),parts[0] - else: - p,cl = parts - if filename: - logging.info('try loading from source: %s', filename) - mod = imp.load_source(p, filename) - class_obj = getattr(mod,cl) - else: - logging.info('try regular import: %s.%s', p, cl) - mod = __import__(p,globals(),locals(),[cl]) - class_obj = getattr(mod,cl) - - if (inspect.isclass(class_obj) and - any(True for c in inspect.getmro(class_obj) if c.__name__ == 'IPBaseClass')): - logging.info('IceProd v1 class') - instance = class_obj() - for k in class_args['kwargs']: - instance.SetParameter(k,class_args['kwargs'][k]) - stats = {} - ret = instance.Execute(stats) - if stats: - pickle.dump(stats,open(constants['stats'],'wb')) - if ret: - raise Exception('Execute() returned %r'%ret) - else: - logging.info('regular callable') - class_obj(*class_args['args'],**class_args['kwargs']) - - -def main(): - import argparse - parser = argparse.ArgumentParser(description='IceProd Core') - parser.add_argument('-c','--classname', type=str, default=None, - help='Specify class name') - parser.add_argument('-f','--filename', type=str, default=None, - help='Specify file to find the class in') - parser.add_argument('-a','--args', action='store_true', default=False, - help='Enable arg file detection') - parser.add_argument('-d','--debug', action='store_true', default=False, - help='Enable debug actions and logging') - args = vars(parser.parse_args()) - try: - run(**args) - except Exception as e: - with open(constants['task_exception'],'wb') as f: - pickle.dump(e,f) - raise - - -if __name__ == '__main__': - main() diff --git a/iceprod/core/pilot.py b/iceprod/core/pilot.py deleted file mode 100644 index e1700e92..00000000 --- a/iceprod/core/pilot.py +++ /dev/null @@ -1,583 +0,0 @@ -"""Pilot functionality""" - -from __future__ import absolute_import, division, print_function - -import os -import sys -import math -import time -import logging -import tempfile -import shutil -import random -from datetime import datetime -from glob import glob -import signal -import traceback -import asyncio -import concurrent.futures - -import iceprod -from iceprod.core.functions import gethostname -from iceprod.core import constants -from iceprod.core.resources import Resources -import iceprod.core.logger - -logger = logging.getLogger('pilot') - -try: - import psutil -except ImportError: - psutil = None - -try: - from setproctitle import setproctitle -except ImportError: - def setproctitle(name): - pass - - -class Pilot: - """ - A pilot task runner. - - The pilot allows multiple tasks to run in sequence or parallel. - It keeps track of resource usage, killing anything that goes over - requested amounts. - - Use as an async context manager:: - - async with Pilot(*args) as p: - await p.run() - - Args: - config (dict): the configuration dictionary - runner (callable): the task/config runner - pilot_id (str): the pilot id - rpc (:py:class:`iceprod.core.exe_json.ServerComms`): RPC to server - debug (bool): debug mode (default False) - run_timeout (int): how often to check if a task is running - backoff_delay (int): what constant delay to use for backoff - backoff_factor (int): what starting delay to use for exponential backoff - download_delay (int): min delay between each task download attempt - resource_interval (float): seconds between resouce measurements - restrict_site (bool): restrict running tasks to explicitly requiring this site - """ - def __init__(self, config, runner, pilot_id, rpc=None, debug=False, - run_timeout=180, backoff_delay=60, backoff_factor=1, - download_delay=60, resource_interval=1.0, restrict_site=False): - self.config = config - self.runner = runner - self.pilot_id = pilot_id - self.hostname = gethostname() - self.rpc = rpc - self.debug = debug - self.run_timeout = run_timeout - self.backoff_delay = backoff_delay - self.backoff_factor = backoff_factor - self.download_delay = download_delay - self.resource_interval = resource_interval - self.query_params = {} - self.last_download = None - - self.running = True - self.tasks = {} - - try: - setproctitle('iceprod2_pilot({})'.format(pilot_id)) - except Exception: - pass - - logger.warning('pilot_id: %s', self.pilot_id) - logger.warning('hostname: %s', self.hostname) - - # hint at resources for pilot - # don't pass them as raw, because that overrides condor - if 'resources' in config['options']: - for k in config['options']['resources']: - v = config['options']['resources'][k] - name = 'NUM_'+k.upper() - if k in ('cpu','gpu'): - name += 'S' - os.environ[name] = str(v) - self.resources = Resources(debug=self.debug) - if restrict_site: - if not self.resources.site: - logger.error('cannot restrict site, as the site is unknown') - else: - self.query_params['requirements.site'] = self.resources.site - - self.start_time = time.time() - - async def __aenter__(self): - # update pilot status - await self.rpc.update_pilot( - self.pilot_id, - tasks=[], - host=self.hostname, - version=iceprod.__version__, - site=self.resources.site, - start_date=datetime.utcnow().isoformat(), - resources_available=self.resources.get_available(), - resources_claimed=self.resources.get_claimed() - ) - - loop = asyncio.get_event_loop() - # set up resource monitor - if psutil: - loop.create_task(self.resource_monitor()) - else: - logger.warning('no psutil. not checking resource usage') - - # set up signal handler - def handler(signum, frame): - logger.critical('termination signal received') - self.running = False - self.term_handler() - self.prev_signal = signal.signal(signal.SIGTERM, handler) - - return self - - async def __aexit__(self, exc_type, exc, tb): - try: - # make sure any child processes are dead - self.hard_kill() - - if self.debug: - # append out, err, log - for dirs in glob('tmp*'): - for filename in (constants['stdout'], constants['stderr'], - constants['stdlog']): - if os.path.exists(os.path.join(dirs,filename)): - with open(filename,'a') as f: - print('', file=f) - print('----',dirs,'----', file=f) - with open(os.path.join(dirs,filename)) as f2: - print(f2.read(), file=f) - - await self.rpc.delete_pilot(self.pilot_id) - except Exception: - logger.error('error in aexit', exc_info=True) - - # restore previous signal handler - signal.signal(signal.SIGTERM, self.prev_signal) - - def term_handler(self): - """Handle a SIGTERM gracefully""" - logger.info('checking resources after SIGTERM') - overages = self.resources.check_claims() - for task_id in list(self.tasks): - task = self.tasks[task_id] - try: - if task_id in overages: - reason = overages[task_id] - else: - reason = 'pilot SIGTERM' - - # clean up task - used_resources = self.resources.get_final(task_id) - self.clean_task(task_id) - message = reason - message += '\n\npilot SIGTERM\npilot_id: {}'.format(self.pilot_id) - message += '\nhostname: {}'.format(self.hostname) - kwargs = { - 'resources': used_resources, - 'reason': reason, - 'message': message, - } - if 'dataset_id' in task['config']['options']: - kwargs['dataset_id'] = task['config']['options']['dataset_id'] - self.rpc.task_kill_sync(task_id, **kwargs) - except Exception: - pass - - # stop the pilot - try: - self.rpc.delete_pilot_sync(self.pilot_id) - except Exception: - pass - sys.exit(1) - - def hard_kill(self): - """Forcefully kill any child processes""" - if psutil: - # kill children correctly - processes = psutil.Process().children(recursive=True) - processes.reverse() - for p in processes: - try: - p.kill() - except psutil.NoSuchProcess: - pass - except Exception: - logger.warning('error killing process', - exc_info=True) - for task in self.tasks.values(): - try: - task['p'].kill() - except ProcessLookupError: - logger.warning('error killing process', - exc_info=True) - - async def resource_monitor(self): - """Monitor the tasks, killing any that go over resource limits""" - try: - sleep_time = self.resource_interval # check every X seconds - while self.running or self.tasks: - logger.debug('pilot monitor - checking resource usage') - start_time = time.time() - - try: - overages = self.resources.check_claims() - for task_id in overages: - used_resources = self.resources.get_peak(task_id) - logger.warning('kill %r for going over resources: %r', - task_id, used_resources) - message = overages[task_id] - message += '\n\npilot_id: {}'.format(self.pilot_id) - message += '\nhostname: {}'.format(self.hostname) - kwargs = { - 'resources': used_resources, - 'reason': overages[task_id], - 'message': message, - } - if 'dataset_id' in self.tasks[task_id]['config']['options']: - kwargs['dataset_id'] = self.tasks[task_id]['config']['options']['dataset_id'] - - self.clean_task(task_id) - await self.rpc.task_kill(task_id, **kwargs) - except Exception: - logger.error('error in resource_monitor', exc_info=True) - - duration = time.time()-start_time - logger.debug('sleep_time %.2f, duration %.2f',sleep_time,duration) - if duration < sleep_time: - await asyncio.sleep(sleep_time-duration) - except Exception: - logger.error('pilot monitor died', exc_info=True) - logger.warning('pilot monitor exiting') - - async def run(self): - """Run the pilot""" - download_errors = max_download_errors = 5 - iceprod_errors = 10 - task_errors = max_task_errors = int(10**math.log10(10+self.resources.total['cpu'])) - logger.info('max_errors: %d, %d', max_download_errors, max_task_errors) - tasks_running = 0 - - async def backoff(): - """Backoff for rate limiting""" - delay = self.backoff_delay+self.backoff_factor*(1+random.random()) - logger.info('backoff %d', delay) - await asyncio.sleep(delay) - self.backoff_factor *= 2 - while self.running or self.tasks: - while self.running: - # retrieve new task(s) - if self.last_download and time.time()-self.last_download < self.download_delay: - logger.warning('last download attempt too recent, backing off') - await asyncio.sleep(time.time()-self.last_download+self.download_delay) - break - self.last_download = time.time() - # if self.resources.total['gpu'] and not self.resources.available['gpu']: - # logger.info('gpu pilot with no gpus left - not queueing') - # break - try: - task_configs = await self.rpc.download_task( - self.config['options']['gridspec'], - resources=self.resources.get_available(), - site=self.resources.site, - query_params=self.query_params - ) - except Exception: - download_errors -= 1 - if download_errors < 1: - self.running = False - logger.warning('errors over limit, draining') - logger.error('cannot download task. current error count is %d', - max_download_errors-download_errors, exc_info=True) - await backoff() - continue - logger.info('task configs: %r', task_configs) - - if not task_configs: - logger.info('no task available') - if not self.tasks: - self.running = False - logger.warning('no task available, draining') - break - else: - # start up new task(s) - for task_config in task_configs: - try: - task_id = task_config['options']['task_id'] - except Exception: - iceprod_errors -= 1 - if iceprod_errors < 1: - self.running = False - logger.warning('errors over limit, draining') - logger.error('error getting task_id from config') - break - try: - if 'resources' not in task_config['options']: - task_config['options']['resources'] = None - task_resources = self.resources.claim(task_id, task_config['options']['resources']) - task_config['options']['resources'] = task_resources - except Exception: - iceprod_errors -= 1 - if iceprod_errors < 1: - self.running = False - logger.warning('errors over limit, draining') - logger.warning('error claiming resources %s', task_id, - exc_info=True) - message = 'pilot_id: {}\nhostname: {}\n\n'.format(self.pilot_id, self.hostname) - message += traceback.format_exc() - kwargs = { - 'reason': 'failed to claim resources', - 'message': message, - } - if 'dataset_id' in task_config['options']: - kwargs['dataset_id'] = task_config['options']['dataset_id'] - await self.rpc.task_kill(task_id, **kwargs) - break - try: - f = self.create_task(task_config) - task = await f.__anext__() - task['iter'] = f - self.tasks[task_id] = task - except Exception: - iceprod_errors -= 1 - if iceprod_errors < 1: - self.running = False - logger.warning('errors over limit, draining') - logger.warning('error creating task %s', task_id, - exc_info=True) - message = 'pilot_id: {}\nhostname: {}\n\n'.format(self.pilot_id, self.hostname) - message += traceback.format_exc() - kwargs = { - 'reason': 'failed to create task', - 'message': message, - } - if 'dataset_id' in task_config['options']: - kwargs['dataset_id'] = task_config['options']['dataset_id'] - await self.rpc.task_kill(task_id, **kwargs) - self.clean_task(task_id) - break - - # update pilot status - await self.rpc.update_pilot(self.pilot_id, tasks=list(self.tasks), - resources_available=self.resources.get_available(), - resources_claimed=self.resources.get_claimed()) - - if (self.resources.available['cpu'] < 1 - or self.resources.available['memory'] < 1 - or (self.resources.total['gpu'] and not self.resources.available['gpu'])): - logger.info('no resources left, so wait for tasks to finish') - break - # otherwise, backoff - await backoff() - - # wait until we can queue more tasks - while self.running or self.tasks: - logger.info('wait while tasks are running. timeout=%r',self.run_timeout) - start_time = time.time() - while self.tasks and time.time()-self.run_timeout < start_time: - done,pending = await asyncio.wait([asyncio.create_task(task['p'].wait()) for task in self.tasks.values()], - timeout=self.resource_interval, - return_when=concurrent.futures.FIRST_COMPLETED) - if done: - break - - tasks_running = len(self.tasks) - for task_id in list(self.tasks): - # check if any processes have died - proc = self.tasks[task_id]['p'] - clean = False - if proc.returncode is not None: - f = self.tasks[task_id]['iter'] - try: - task = await f.__anext__() - except StopAsyncIteration: - logger.warning('task %s finished', task_id) - except Exception: - logger.warning('task %s failed', task_id, - exc_info=True) - task_errors -= 1 - else: - logger.warning('task %s yielded again', task_id) - task['iter'] = f - self.tasks[task_id] = task - continue - - # if we got here, the task is done - clean = True - - # make sure the task is not running anymore - try: - await self.rpc.still_running(task_id) - except Exception: - pass - else: - logger.warning('task %s is still running, so killing it', task_id) - kwargs = { - 'reason': 'task exited with return code {}'.format(proc.returncode), - 'message': 'task exited with return code {}'.format(proc.returncode), - 'resources': self.resources.get_final(task_id), - } - if 'dataset_id' in self.tasks[task_id]['config']['options']: - kwargs['dataset_id'] = self.tasks[task_id]['config']['options']['dataset_id'] - await self.rpc.task_kill(task_id, **kwargs) - else: - # check if the DB has killed a task - try: - await self.rpc.still_running(task_id) - except Exception: - logger.warning('task %s killed by db', task_id) - kwargs = { - 'reason': 'server kill', - 'message': 'The server has marked the task as no longer running', - } - if 'dataset_id' in self.tasks[task_id]['config']['options']: - kwargs['dataset_id'] = self.tasks[task_id]['config']['options']['dataset_id'] - await self.rpc.task_kill(task_id, **kwargs) - clean = True - if clean: - self.clean_task(task_id) - if task_errors < 1: - self.running = False - logger.warning('errors over limit, draining') - - # update pilot status - if (not self.tasks) or len(self.tasks) < tasks_running: - logger.info('%d tasks removed', tasks_running-len(self.tasks)) - tasks_running = len(self.tasks) - await self.rpc.update_pilot(self.pilot_id, tasks=list(self.tasks), - resources_available=self.resources.get_available(), - resources_claimed=self.resources.get_claimed()) - if self.running: - break - elif (self.running and self.resources.available['cpu'] > 1 - and self.resources.available['memory'] > 1 - and (self.resources.available['gpu'] or not self.resources.total['gpu'])): - logger.info('resources available, so request a task') - break - - # last update for pilot state - await self.rpc.update_pilot(self.pilot_id, tasks=[], - resources_available=self.resources.get_available(), - resources_claimed=self.resources.get_claimed()) - - if task_errors < 1: - logger.critical('too many errors when running tasks') - raise RuntimeError('too many errors') - else: - logger.warning('cleanly stopping pilot') - - async def create_task(self, config): - """ - Create a new Task and start running it - - Args: - config (dict): The task config - """ - task_id = config['options']['task_id'] - - # add grid-specific config - for k in self.config['options']: - if k == 'resources': - pass - elif k not in config['options']: - config['options'][k] = self.config['options'][k] - - tmpdir = tempfile.mkdtemp(suffix='.{}'.format(task_id), dir=os.getcwd()) - config['options']['subprocess_dir'] = tmpdir - - # start the task - # r = config['options']['resources'] - async for proc in self.runner(config, resources=self.resources): - ps = psutil.Process(proc.pid) if psutil else None - self.resources.register_process(task_id, ps, tmpdir) - data = { - 'p': proc, - 'process': ps, - 'tmpdir': tmpdir, - 'config': config, - } - yield data - - def clean_task(self, task_id): - """Clean up a Task. - - Delete remaining processes and the task temp dir. Release resources - back to the pilot. - - Args: - task_id (str): the task_id - """ - logger.info('cleaning task %s', task_id) - if task_id in self.tasks: - task = self.tasks[task_id] - del self.tasks[task_id] - - # kill process if still running - try: - if psutil: - # kill children correctly - try: - processes = task['process'].children(recursive=True) - except psutil.NoSuchProcess: - pass # process already died - else: - processes.reverse() - processes.append(task['process']) - for p in processes: - try: - p.terminate() - except psutil.NoSuchProcess: - pass - except Exception: - logger.warning('error terminating process', - exc_info=True) - - def on_terminate(proc): - logger.info("process %r terminated with exit code %r", - proc, proc.returncode) - try: - gone, alive = psutil.wait_procs(processes, timeout=0.1, - callback=on_terminate) - for p in alive: - try: - p.kill() - except psutil.NoSuchProcess: - pass - except Exception: - logger.warning('error killing process', - exc_info=True) - except Exception: - logger.warning('failed to kill processes', - exc_info=True) - task['p'].kill() - except ProcessLookupError: - pass # process already died - except Exception: - logger.warning('error deleting process', exc_info=True) - - # copy stdout/stderr - try: - os.rename(os.path.join(task['tmpdir'],constants['stderr']), constants['stderr']) - os.rename(os.path.join(task['tmpdir'],constants['stdout']), constants['stdout']) - except Exception: - logger.warning('error copying std[out,err] files', exc_info=True) - - # clean tmpdir - try: - if not self.debug: - shutil.rmtree(task['tmpdir']) - except Exception: - logger.warning('error deleting tmpdir', exc_info=True) - - # return resources to pilot - try: - self.resources.release(task_id) - except Exception: - logger.warning('error releasing resources', exc_info=True) diff --git a/iceprod/core/serialization.py b/iceprod/core/serialization.py deleted file mode 100644 index fe463d1b..00000000 --- a/iceprod/core/serialization.py +++ /dev/null @@ -1,79 +0,0 @@ -""" -Classes supporting dataset serialization. - -The interface is as follows:: - - class Serialization: - def dump(config, filename, **kwargs): - return None - def dumps(config, **kwargs): - return string - def load(filename, **kwargs): - return config - def loads(string, **kwargs): - return config - -The kwargs for each function are optional keyword arguments to pass -to the underlying serialization library. Each function is a static method -and can be called like:: - - Serialization.dump(config,filename) -""" - -import logging -import json as _json - -from iceprod.core import dataclasses - -logger = logging.getLogger('serialization') - - -class SerializationError(Exception): - """An exception that occurs during serialization.""" - def __init__(self, value=''): - self.value = value - - def __str__(self): - return self.__repr__() - - def __repr__(self): - if self.value: - return 'SerializationError(%r)'%(self.value) - else: - return 'SerializationError()' - - def __reduce__(self): - return (SerializationError,(self.value,)) - - -def dict_to_dataclasses(input_dict): - """ - Convert a dictionary to dataclasses objects. - - :param input_dict: input dictionary - :returns: :class:`iceprod.core.dataclasses.Job` - """ - ret = dataclasses.Job(input_dict) - ret.convert() - return ret - - -class serialize_json(object): - """ - Serialize a dataset config to json. - """ - @staticmethod - def dump(obj, filename, **kwargs): - return _json.dump(obj, open(filename,'w'), **kwargs) - - @staticmethod - def dumps(obj, **kwargs): - return _json.dumps(obj, **kwargs) - - @staticmethod - def load(filename, **kwargs): - return dict_to_dataclasses(_json.load(open(filename), **kwargs)) - - @staticmethod - def loads(obj, **kwargs): - return dict_to_dataclasses(_json.loads(obj, **kwargs)) diff --git a/tests/core/config_test.py b/tests/core/config_test.py index 47208f21..94bfe75e 100644 --- a/tests/core/config_test.py +++ b/tests/core/config_test.py @@ -1,3 +1,4 @@ +import logging import pytest from rest_tools.client import RestClient @@ -9,11 +10,17 @@ def test_dataset_dataclasses(): with pytest.raises(Exception): Dataset() - d = Dataset('did123', 123, 'grp', 'usr', {}) + d = Dataset('did123', 123, 1, 2, 3, 'processing', 0.5, 'grp', 'usr', False, {}) assert d.dataset_id == 'did123' assert d.dataset_num == 123 + assert d.jobs_submitted == 1 + assert d.tasks_submitted == 2 + assert d.tasks_per_job == 3 + assert d.status == 'processing' + assert d.priority == 0.5 assert d.group == 'grp' assert d.user == 'usr' + assert d.debug is False assert d.config == {} @@ -22,8 +29,14 @@ async def test_load_config(requests_mock): dataset_data = { 'dataset': 123, 'dataset_id': 'did123', + 'status': 'processing', + 'jobs_submitted': 1, + 'tasks_submitted': 1, + 'tasks_per_job': 1, + 'priority': 0.5, 'group': 'g123', 'username': 'u123', + 'debug': False } requests_mock.get(f'http://test.iceprod/datasets/{dataset_id}', json=dataset_data) config_data = { @@ -46,26 +59,119 @@ async def test_defaults(): dataset_data = { 'dataset': 123, 'dataset_id': 'did123', + 'status': 'processing', + 'jobs_submitted': 1, + 'tasks_submitted': 1, + 'tasks_per_job': 1, + 'priority': 0.5, 'group': 'g123', 'username': 'u123', + 'debug': False } config_data = {} - d = Dataset(dataset_data['dataset_id'], dataset_data['dataset'], dataset_data['group'], dataset_data['username'], config_data) + d = Dataset( + dataset_data['dataset_id'], + dataset_data['dataset'], + dataset_data['jobs_submitted'], + dataset_data['tasks_submitted'], + dataset_data['tasks_per_job'], + dataset_data['status'], + dataset_data['priority'], + dataset_data['group'], + dataset_data['username'], + dataset_data['debug'], + config_data + ) d.fill_defaults() + logging.info('after defaults: %r', d.config) assert d.config['version'] == 3.1 + assert d.config['options'] == {} + assert d.config['steering'] == {'parameters': {}, 'batchsys': {}, 'data': []} + +async def test_defaults_refs(): + dataset_data = { + 'dataset': 123, + 'dataset_id': 'did123', + 'status': 'processing', + 'jobs_submitted': 1, + 'tasks_submitted': 1, + 'tasks_per_job': 1, + 'priority': 0.5, + 'group': 'g123', + 'username': 'u123', + 'debug': False + } + config_data = { + 'steering': { + 'parameters': {'a': 'b'} + }, + 'tasks': [{ + 'requirements': {}, + 'data': [{ + 'remote': 'http://test/file' + }], + 'trays': [{ + 'modules': [{}] + }] + }] + } + d = Dataset( + dataset_data['dataset_id'], + dataset_data['dataset'], + dataset_data['jobs_submitted'], + dataset_data['tasks_submitted'], + dataset_data['tasks_per_job'], + dataset_data['status'], + dataset_data['priority'], + dataset_data['group'], + dataset_data['username'], + dataset_data['debug'], + config_data + ) + d.fill_defaults() + logging.info('after defaults: %r', d.config) + assert d.config['tasks'][0]['requirements']['cpu'] == 1 + assert d.config['tasks'][0]['requirements']['memory'] == 1.0 + assert d.config['tasks'][0]['requirements']['disk'] == 1.0 + assert d.config['tasks'][0]['requirements']['time'] == 1.0 + assert d.config['tasks'][0]['data'][0]['local'] == '' + assert d.config['tasks'][0]['data'][0]['type'] == 'permanent' + assert d.config['tasks'][0]['data'][0]['movement'] == 'input' + assert d.config['tasks'][0]['data'][0]['transfer'] is True + assert d.config['tasks'][0]['trays'][0]['iterations'] == 1 + assert d.config['tasks'][0]['trays'][0]['modules'][0]['env_shell'] == '' + assert d.config['tasks'][0]['trays'][0]['modules'][0]['env_clear'] is True async def test_validate_error(): dataset_data = { 'dataset': 123, 'dataset_id': 'did123', + 'status': 'processing', + 'jobs_submitted': 1, + 'tasks_submitted': 1, + 'tasks_per_job': 1, + 'priority': 0.5, 'group': 'g123', 'username': 'u123', + 'debug': False } config_data = { 'my': 'config' } - d = Dataset(dataset_data['dataset_id'], dataset_data['dataset'], dataset_data['group'], dataset_data['username'], config_data) + d = Dataset( + dataset_data['dataset_id'], + dataset_data['dataset'], + dataset_data['jobs_submitted'], + dataset_data['tasks_submitted'], + dataset_data['tasks_per_job'], + dataset_data['status'], + dataset_data['priority'], + dataset_data['group'], + dataset_data['username'], + dataset_data['debug'], + config_data + ) with pytest.raises(Exception): d.validate() @@ -74,8 +180,14 @@ async def test_validate_valid(): dataset_data = { 'dataset': 123, 'dataset_id': 'did123', + 'status': 'processing', + 'jobs_submitted': 1, + 'tasks_submitted': 1, + 'tasks_per_job': 1, + 'priority': 0.5, 'group': 'g123', 'username': 'u123', + 'debug': False } config_data = { 'tasks': [{ @@ -85,7 +197,19 @@ async def test_validate_valid(): }] }] } - d = Dataset(dataset_data['dataset_id'], dataset_data['dataset'], dataset_data['group'], dataset_data['username'], config_data) + d = Dataset( + dataset_data['dataset_id'], + dataset_data['dataset'], + dataset_data['jobs_submitted'], + dataset_data['tasks_submitted'], + dataset_data['tasks_per_job'], + dataset_data['status'], + dataset_data['priority'], + dataset_data['group'], + dataset_data['username'], + dataset_data['debug'], + config_data + ) d.fill_defaults() d.validate() @@ -94,7 +218,7 @@ def test_job_dataclasses(): with pytest.raises(Exception): Job() - d = Dataset('did123', 123, 'grp', 'usr', {}) + d = Dataset('did123', 123, 2, 1, 1, 'processing', 0.5, 'grp', 'usr', False, {}) j = Job(d, 'j123', 1, 'processing') assert j.dataset == d @@ -107,7 +231,7 @@ def test_task_dataclasses(): with pytest.raises(Exception): Task() - d = Dataset('did123', 123, 'grp', 'usr', {}) + d = Dataset('did123', 123, 2, 1, 1, 'processing', 0.5, 'grp', 'usr', False, {}) j = Job(d, 'j123', 1, 'processing') t = Task(d, j, 't123', 0, 'foo', [], {}, 'waiting', '', {}) @@ -124,7 +248,7 @@ def test_task_dataclasses(): def test_task_config(): - d = Dataset('did123', 123, 'grp', 'usr', {'tasks':[1,2,3]}) + d = Dataset('did123', 123, 2, 1, 1, 'processing', 0.5, 'grp', 'usr', False, {'tasks':[1,2,3]}) j = Job(d, 'j123', 1, 'processing') t = Task(d, j, 't123', 0, 'foo', [], {}, 'waiting', '', {}) @@ -135,9 +259,15 @@ async def test_task_load_from_api(requests_mock): dataset_id = 'did123' dataset_data = { 'dataset': 123, - 'dataset_id': dataset_id, + 'dataset_id': 'did123', + 'status': 'processing', + 'jobs_submitted': 1, + 'tasks_submitted': 1, + 'tasks_per_job': 1, + 'priority': 0.5, 'group': 'g123', 'username': 'u123', + 'debug': False } requests_mock.get(f'http://test.iceprod/datasets/{dataset_id}', json=dataset_data) config_data = { @@ -177,7 +307,7 @@ async def test_task_load_from_api(requests_mock): async def test_task_load_stats(requests_mock): - d = Dataset('did123', 123, 'grp', 'usr', {'tasks':[1,2,3]}) + d = Dataset('did123', 123, 2, 1, 1, 'processing', 0.5, 'grp', 'usr', False, {'tasks':[1,2,3]}) j = Job(d, 'j123', 1, 'processing') t = Task(d, j, 't123', 0, 'foo', [], {}, 'waiting', '', {}) diff --git a/tests/core/exe_test.py b/tests/core/exe_test.py index 730d20f5..edf0f9e8 100644 --- a/tests/core/exe_test.py +++ b/tests/core/exe_test.py @@ -2,2528 +2,530 @@ Test script for core exe """ -from __future__ import absolute_import, division, print_function +import logging +import pytest + +import iceprod.core.config +import iceprod.core.exe +from iceprod.core.exe import Data +from iceprod.core.defaults import add_default_options -from tests.util import unittest_reporter, glob_tests -import logging logger = logging.getLogger('exe_test') -import os -import sys -import time -import shutil -import tempfile -import random -import string -import subprocess -from functools import partial, reduce - -try: - import cPickle as pickle -except: - import pickle -import unittest -from unittest.mock import patch, MagicMock - -from tornado.testing import AsyncTestCase - -from iceprod.core import to_log,constants -import iceprod.core.dataclasses -import iceprod.core.functions -import iceprod.core.exe -from iceprod.core.jsonUtil import json_encode,json_decode - - -class DownloadTestCase(AsyncTestCase): - def setUp(self): - super(DownloadTestCase,self).setUp() - - self.test_dir = tempfile.mkdtemp(dir=os.getcwd()) - curdir = os.getcwd() - os.symlink(os.path.join(curdir, 'iceprod'), - os.path.join(self.test_dir, 'iceprod')) - os.chdir(self.test_dir) - def cleanup(): - os.chdir(curdir) - shutil.rmtree(self.test_dir) - self.addCleanup(cleanup) - - # clean up environment - base_env = dict(os.environ) - def reset_env(): - for k in set(os.environ).difference(base_env): - del os.environ[k] - for k in base_env: - os.environ[k] = base_env[k] - self.addCleanup(reset_env) - - def mk_files(self, path, data, compress=None, ext=False): - orig_path = path - if not ext: - path,ext = os.path.splitext(path) - while ext: - path,ext = os.path.splitext(path) - dirname = os.path.dirname(path) - if not os.path.exists(dirname): - os.mkdir(dirname) - if isinstance(data,dict): - # make directory of things - if not os.path.exists(path): - os.mkdir(path) - for k in data: - with open(os.path.join(path,k),'w' if isinstance(data[k],str) else 'wb') as f: - f.write(data[k]) - else: - with open(path,'w' if isinstance(data,str) else 'wb') as f: - f.write(data) - if compress: - new_path = iceprod.core.functions.compress(path,compress) - if new_path != orig_path: - os.rename(new_path, orig_path) - if orig_path != path: - iceprod.core.functions.removedirs(path) - - def make_shared_lib(self): - """Make a shared library file used for testing""" - so_file = os.path.join(self.test_dir,'hello')[len(os.getcwd()):] - if so_file[0] == '/': - so_file = so_file[1:] - if os.path.exists(so_file+'.so'): - with open(so_file+'.so','rb') as f: - return f.read() - - # find Python.h - from distutils import sysconfig - pythondir = sysconfig.get_python_inc() - logger.info('pythondir: %s', pythondir) - if os.path.exists(os.path.join(pythondir, 'Python.h')): - pythonheader = os.path.join(os.path.basename(pythondir), 'Python.h') - elif os.path.exists(os.path.join(os.path.dirname(pythondir), 'Python.h')): - pythonheader = 'Python.h' - else: - raise Exception('cannot find Python.h') - - with open(so_file+'.c','w') as f: - f.write('#include <'+pythonheader+"""> - -static PyObject* say_hello(PyObject* self, PyObject* args) -{ - const char* name; - - if (!PyArg_ParseTuple(args, "s", &name)) - return NULL; - - return Py_BuildValue("s", name); -} - -static PyMethodDef HelloMethods[] = -{ - {"say_hello", say_hello, METH_VARARGS, "Greet somebody."}, - {NULL, NULL, 0, NULL} -}; - -#if PY_MAJOR_VERSION >= 3 - static struct PyModuleDef moduledef = { - PyModuleDef_HEAD_INIT, - "hello", /* m_name */ - "Hello world", /* m_doc */ - -1, /* m_size */ - HelloMethods, /* m_methods */ - NULL, /* m_reload */ - NULL, /* m_traverse */ - NULL, /* m_clear */ - NULL, /* m_free */ - }; - PyMODINIT_FUNC - PyInit_hello(void) - { - (void) PyModule_Create(&moduledef); + +def get_task(config): + d = iceprod.core.config.Dataset('did123', 123, 2, 1, 1, 'processing', 0.5, 'grp', 'usr', False, config) + d.fill_defaults() + add_default_options(d.config['options']) + j = iceprod.core.config.Job(d, 'j123', 1, 'processing') + t = iceprod.core.config.Task(d, j, 't123', 0, 'foo', [], {}, 'waiting', '', {}) + return t + + +def test_config_parser(): + t = get_task({ + 'steering': { + 'parameters': {'foo': 1, 'bar': [2, 3, 4]} + }, + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{}] + }] + }] + }) + + c = iceprod.core.exe.ConfigParser(t.dataset, logger=logger) + assert c.parseValue('$steering(foo)') == 1 + assert c.parseObject({'foo': '$steering(bar)'}, {}) == {'foo': [2, 3, 4]} + assert c.parseObject('$(bar)', {'parameters': {'bar': {'a': 'b'}}}) == {'a': 'b'} + + +def test_scope_env(): + t = get_task({ + 'steering': { + 'parameters': {'foo': 1, 'bar': [2, 3, 4]} + }, + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{}] + }], + 'data': [{ + 'movement': 'input', + 'remote': 'https://foo.bar/baz' + }], + }] + }) + + c = iceprod.core.exe.ConfigParser(t.dataset, logger=logger) + with iceprod.core.exe.scope_env(c, t.dataset.config['steering']) as env: + # test parameters + assert env['parameters'] == t.dataset.config['steering']['parameters'] + assert c.parseObject('$(foo)', env) == 1 + + # test parsing data + with iceprod.core.exe.scope_env(c, t.dataset.config['tasks'][0], env) as tenv: + assert tenv['input_files'] == {Data('https://foo.bar/baz', 'baz')} + assert env['input_files'] == {Data('https://foo.bar/baz', 'baz')} + + +def test_download_data(): + data = { + 'movement': 'input', + 'remote': 'https://foo.bar/baz' + } + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{}] + }], + 'data': [data], + }] + }) + + c = iceprod.core.exe.ConfigParser(t.dataset, logger=logger) + ret = iceprod.core.exe.downloadData(data, c, logger=logger) + assert ret == Data('https://foo.bar/baz', 'baz') + + +def test_download_data_invalid(): + data = { + 'movement': 'input', + 'remote': '' + } + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{}] + }], + 'data': [data], + }] + }) + + c = iceprod.core.exe.ConfigParser(t.dataset, logger=logger) + with pytest.raises(Exception): + iceprod.core.exe.downloadData(data, c, logger=logger) + + +def test_download_data_no_transfer(): + data = { + 'movement': 'input', + 'remote': 'https://foo.bar/baz', + 'transfer': 'no' + } + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{}] + }], + 'data': [data], + }] + }) + + c = iceprod.core.exe.ConfigParser(t.dataset, logger=logger) + ret = iceprod.core.exe.downloadData(data, c, logger=logger) + assert not ret + + +def test_download_data_job_temp(): + data = { + 'movement': 'input', + 'local': 'baz', + 'type': 'job_temp' + } + t = get_task({ + 'options': { + 'job_temp': 'https://foo.bar', + }, + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{}] + }], + 'data': [data], + }] + }) + + c = iceprod.core.exe.ConfigParser(t.dataset, logger=logger) + ret = iceprod.core.exe.downloadData(data, c, logger=logger) + assert ret == Data('https://foo.bar/baz', 'baz') + + +def test_upload_data(): + data = { + 'movement': 'output', + 'remote': 'https://foo.bar/baz' + } + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{}] + }], + 'data': [data], + }] + }) + + c = iceprod.core.exe.ConfigParser(t.dataset, logger=logger) + ret = iceprod.core.exe.uploadData(data, c, logger=logger) + assert ret == Data('https://foo.bar/baz', 'baz') + + +def test_upload_data_invalid(): + data = { + 'movement': 'output', + 'remote': '' + } + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{}] + }], + 'data': [data], + }] + }) + + c = iceprod.core.exe.ConfigParser(t.dataset, logger=logger) + with pytest.raises(Exception): + iceprod.core.exe.uploadData(data, c, logger=logger) + + +def test_upload_data_no_transfer(): + data = { + 'movement': 'output', + 'remote': 'https://foo.bar/baz', + 'transfer': 'no' } -#else - PyMODINIT_FUNC - inithello(void) - { - (void) Py_InitModule("hello", HelloMethods); + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{}] + }], + 'data': [data], + }] + }) + + c = iceprod.core.exe.ConfigParser(t.dataset, logger=logger) + ret = iceprod.core.exe.uploadData(data, c, logger=logger) + assert not ret + + +def test_upload_data_job_temp(): + data = { + 'movement': 'output', + 'local': 'baz', + 'type': 'job_temp' } -#endif -""") - from distutils.ccompiler import new_compiler - c = new_compiler() - logger.info('pwd: %s',os.path.expandvars('$PWD')) - with to_log(stream=sys.stderr,level='warn'),to_log(stream=sys.stdout): - try: - ret = c.compile([so_file+'.c'],output_dir='.',include_dirs=[os.path.dirname(pythondir),pythondir]) - logger.info('ret1: %r',ret) - ret = c.link_shared_object([so_file+'.o'],so_file+'.so') - logger.info('ret2: %r',ret) - except: - ret = c.compile([so_file+'.c'],output_dir='.',include_dirs=[os.path.dirname(pythondir),pythondir], - extra_preargs=['-fPIC']) - logger.info('ret3: %r',ret) - ret = c.link_shared_object([so_file+'.o'],so_file+'.so') - logger.info('ret4: %r',ret) - - with open(so_file+'.so','rb') as f: - return f.read() - -class exe_test(DownloadTestCase): - def setUp(self): - super(exe_test,self).setUp() - - # set offline mode - self.config = iceprod.core.exe.Config() - self.config.config['options']['offline'] = True - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadResource') - async def test_001_downloadResource(self, download): - # create an environment - options = {'resource_url': 'http://blah/downloads', - 'resource_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Resource() - r['remote'] = 'stuff' - r['local'] = 'localstuff' - - # create the downloaded file - async def create(*args,**kwargs): - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, 'the data') - return path - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadResource(env,r) - # check for record of file in env - if r['local'] not in env['files']: - raise Exception('downloadResource did not add the file ' - '%s to the env'%r['local']) - if not os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('downloadResource did not write to the ' - 'expected filename of %s'%r['local']) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadResource - gz') - async def test_002_downloadResource(self, download): - # create an environment - options = {'resource_url': 'http://blah/downloads', - 'resource_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Resource() - r['remote'] = 'stuff2.gz' - r['local'] = 'localstuff2.gz' - r['compression'] = True - - # create the downloaded file - async def create(*args,**kwargs): - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, 'the data', compress='gz') - return path - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadResource(env,r) - # check for record of file in env - if r['local'] not in env['files']: - raise Exception('did not add the file ' - '%s to the env'%r['local']) - if not os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('downloadResource did not write to the ' - 'expected filename of %s'%r['local']) - if not os.path.isfile(os.path.join(self.test_dir,r['local'][:-3])): - raise Exception('did not uncompress to the ' - 'expected filename of %s'%r['local'][:-3]) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadResource - tar') - async def test_003_downloadResource(self, download): - # create an environment - options = {'resource_url': 'http://blah/downloads', - 'resource_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Resource() - r['remote'] = 'stuff3.tar' - r['local'] = 'localstuff3.tar' - r['compression'] = True - - # create the downloaded file - async def create(*args,**kwargs): - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, {'f':'the data'}, compress='tar') - return path - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadResource(env,r) - # check for record of file in env - if r['local'] not in env['files']: - raise Exception('did not add the file ' - '%s to the env'%r['local']) - if not os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('downloadResource did not write to the ' - 'expected filename of %s'%r['local']) - if not os.path.isdir(os.path.join(self.test_dir,r['local'][:-4])): - raise Exception('did not uncompress to the ' - 'expected filename of %s'%r['local'][:-4]) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadResource - tar.bz2') - async def test_004_downloadResource(self, download): - # create an environment - options = {'resource_url': 'http://blah/downloads', - 'resource_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Resource() - r['remote'] = 'stuff4.tar.bz2' - r['local'] = 'localstuff4.tar.bz2' - r['compression'] = True - - # create the downloaded file - async def create(*args,**kwargs): - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, {'f':'the data'}, compress='bz2') - return path - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadResource(env,r) - # check for record of file in env - if r['local'] not in env['files']: - raise Exception('did not add the file ' - '%s to the env'%r['local']) - if not os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('downloadResource did not write to the ' - 'expected filename of %s'%r['local']) - if not os.path.isdir(os.path.join(self.test_dir,r['local'][:-8])): - raise Exception('did not uncompress to the ' - 'expected filename of %s'%r['local'][:-8]) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadResource - tgz') - async def test_005_downloadResource(self, download): - # create an environment - options = {'resource_url': 'http://blah/downloads', - 'resource_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Resource() - r['remote'] = 'stuff5.tgz' - r['local'] = 'localstuff5.tgz' - r['compression'] = True - - # create the downloaded file - async def create(*args,**kwargs): - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, {'f':'the data'}, compress='gz') - return path - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadResource(env,r) - # check for record of file in env - if r['local'] not in env['files']: - raise Exception('did not add the file ' - '%s to the env'%r['local']) - if not os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('downloadResource did not write to the ' - 'expected filename of %s'%r['local']) - if not os.path.isdir(os.path.join(self.test_dir,r['local'][:-4])): - raise Exception('did not uncompress to the ' - 'expected filename of %s'%r['local'][:-4]) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadResource - invalid env') - async def test_006_downloadResource(self, download): - async def create(*args,**kwargs): - path = os.path.join(self.test_dir, 'localstuff5') - self.mk_files(path, 'the data') - return path - download.return_value = create - - r = iceprod.core.dataclasses.Resource() - r['remote'] = 'stuff5.tgz' - r['local'] = 'localstuff5.tgz' - r['compression'] = None - with self.assertRaises(Exception): - await iceprod.core.exe.downloadResource({},r) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadResource - maybe transfer') - async def test_007_downloadResource(self, download): - # create an environment - options = {'resource_url': 'http://blah/downloads', - 'resource_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Resource() - r['remote'] = 'stuff' - r['local'] = 'localstuff' - r['transfer'] = 'maybe' - - # don't create the downloaded file - async def create(*args,**kwargs): - raise Exception('no file') - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadResource(env,r) - # check for record of file in env - download.assert_called() - self.assertNotIn(r['local'], env['files']) - if os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('downloadResource wrote to the ' - 'expected filename of %s'%r['local']) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadResource - no transfer') - async def test_008_downloadResource(self, download): - # create an environment - options = {'resource_url': 'http://blah/downloads', - 'resource_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Resource() - r['remote'] = 'stuff' - r['local'] = 'localstuff' - r['transfer'] = 0 - - # don't create the downloaded file - async def create(*args,**kwargs): - raise Exception('no file') - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadResource(env,r) - # check for record of file in env - download.assert_not_called() - if 'files' in env: - self.assertNotIn(r['local'], env['files']) - if os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('downloadResource wrote to the ' - 'expected filename of %s'%r['local']) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadData') - async def test_010_downloadData(self, download): - # create an environment - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff' - r['local'] = 'localstuff' - r['type'] = 'permanent' - r['movement'] = 'input' - - # create the downloaded file - async def create(*args,**kwargs): - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, 'the data') - return path - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadData(env,r) - # check for record of file in env - if r['local'] not in env['files']: - raise Exception('downloadResource did not add the file ' - '%s to the env'%r['local']) - if not os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('downloadResource did not write to the ' - 'expected filename of %s'%r['local']) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadData - gz') - async def test_011_downloadData(self, download): - # create an environment - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff2.gz' - r['local'] = 'localstuff2.gz' - r['compression'] = True - r['type'] = 'permanent' - r['movement'] = 'input' - - # create the downloaded file - async def create(*args,**kwargs): - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, 'the data', compress='gz') - return path - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadData(env,r) - # check for record of file in env - if r['local'] not in env['files']: - raise Exception('did not add the file ' - '%s to the env'%r['local']) - if not os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('did not write to the ' - 'expected filename of %s'%r['local']) - if not os.path.isfile(os.path.join(self.test_dir,r['local'][:-3])): - raise Exception('did not uncompress to the ' - 'expected filename of %s'%r['local'][:-3]) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadData - tar') - async def test_012_downloadData(self, download): - # create an environment - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff3.tar' - r['local'] = 'localstuff3.tar' - r['compression'] = True - r['type'] = 'permanent' - r['movement'] = 'input' - - # create the downloaded file - async def create(*args,**kwargs): - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, {'f':'the data'}, compress='tar') - return path - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadData(env,r) - # check for record of file in env - if r['local'] not in env['files']: - raise Exception('did not add the file ' - '%s to the env'%r['local']) - if not os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('did not write to the ' - 'expected filename of %s'%r['local']) - if not os.path.isdir(os.path.join(self.test_dir,r['local'][:-4])): - raise Exception('did not uncompress to the ' - 'expected filename of %s'%r['local'][:-4]) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadData - tar.bz2') - async def test_013_downloadData(self, download): - # create an environment - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff4.tar.bz2' - r['local'] = 'localstuff4.tar.bz2' - r['compression'] = True - r['type'] = 'permanent' - r['movement'] = 'input' - - # create the downloaded file - async def create(*args,**kwargs): - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, {'f':'the data'}, compress='bz2') - return path - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadData(env,r) - # check for record of file in env - if r['local'] not in env['files']: - raise Exception('did not add the file ' - '%s to the env'%r['local']) - if not os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('did not write to the ' - 'expected filename of %s'%r['local']) - if not os.path.isdir(os.path.join(self.test_dir,r['local'][:-8])): - raise Exception('did not uncompress to the ' - 'expected filename of %s'%r['local'][:-8]) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadData - tgz') - async def test_014_downloadData(self, download): - # create an environment - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff4.tgz' - r['local'] = 'localstuff4.tgz' - r['compression'] = True - r['type'] = 'permanent' - r['movement'] = 'input' - - # create the downloaded file - async def create(*args,**kwargs): - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, {'f':'the data'}, compress='gz') - return path - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadData(env,r) - # check for record of file in env - if r['local'] not in env['files']: - raise Exception('did not add the file ' - '%s to the env'%r['local']) - if not os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('did not write to the ' - 'expected filename of %s'%r['local']) - if not os.path.isdir(os.path.join(self.test_dir,r['local'][:-4])): - raise Exception('did not uncompress to the ' - 'expected filename of %s'%r['local'][:-4]) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadData - invalid env') - async def test_015_downloadData(self, download): - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff4' - r['local'] = 'localstuff4' - r['compression'] = None - r['type'] = 'permanent' - r['movement'] = 'input' - - # try supplying invalid env - with self.assertRaises(Exception): - await iceprod.core.exe.downloadData({},r) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadData - maybe transfer') - async def test_016_downloadData(self, download): - # create an environment - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff' - r['local'] = 'localstuff' - r['type'] = 'permanent' - r['movement'] = 'input' - r['transfer'] = 'maybe' - - # create the downloaded file - async def create(*args,**kwargs): - raise Exception() - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadData(env,r) - # check for record of file in env - download.assert_called() - if 'files' in env: - self.assertNotIn(r['local'], env['files']) - if os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('downloadResource wrote to the ' - 'expected filename of %s'%r['local']) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadData - no transfer') - async def test_017_downloadData(self, download): - # create an environment - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff' - r['local'] = 'localstuff' - r['type'] = 'permanent' - r['movement'] = 'input' - r['transfer'] = False - - # create the downloaded file - async def create(*args,**kwargs): - raise Exception() - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadData(env,r) - # check for record of file in env - download.assert_not_called() - if 'files' in env: - self.assertNotIn(r['local'], env['files']) - if os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('downloadResource wrote to the ' - 'expected filename of %s'%r['local']) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='downloadData - oauth creds') - async def test_018_downloadData(self, download): - # create an environment - token_file = os.path.join(self.test_dir, 'token') - with open(token_file, 'w') as f: - f.write('XXX') - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir, - 'credentials_dir': self.test_dir, - 'credentials': {'http://blah': 'token'}} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'http://blah//downloads/stuff' - r['local'] = 'localstuff' - r['type'] = 'permanent' - r['movement'] = 'input' - - # create the downloaded file - async def create(*args,**kwargs): - assert kwargs['options']['token'] == 'XXX' - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, 'the data') - return path - download.side_effect = create - - # try downloading the resource - await iceprod.core.exe.downloadData(env,r) - # check for record of file in env - if r['local'] not in env['files']: - raise Exception('downloadResource did not add the file ' - '%s to the env'%r['local']) - if not os.path.isfile(os.path.join(self.test_dir,r['local'])): - raise Exception('downloadResource did not write to the ' - 'expected filename of %s'%r['local']) - - @patch('iceprod.core.exe.functions.upload') - @unittest_reporter(name='uploadData') - async def test_020_uploadData(self, upload): - # create an environment - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff' - r['local'] = 'localstuff' - r['type'] = 'permanent' - r['movement'] = 'both' - - # create the downloaded file - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, 'the data') - - async def up(*args,**kwargs): - pass - upload.side_effect = up - - # try uploading the data - await iceprod.core.exe.uploadData(env,r) - self.assertTrue(upload.called) - self.assertEqual(upload.call_args[0][0], - os.path.join(options['data_directory'],r['local'])) - self.assertEqual(upload.call_args[0][1], - os.path.join(options['data_url'],r['remote'])) - - @patch('iceprod.core.exe.functions.upload') - @unittest_reporter(name='uploadData - gz') - async def test_021_uploadData(self, upload): - # create an environment - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff2.gz' - r['local'] = 'localstuff2' - r['compression'] = True - r['type'] = 'permanent' - r['movement'] = 'both' - - async def up(*args,**kwargs): - pass - upload.side_effect = up - - # create the downloaded file - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, 'the data') - - # try uploading the data - await iceprod.core.exe.uploadData(env,r) - self.assertTrue(upload.called) - self.assertEqual(upload.call_args[0][0], - os.path.join(options['data_directory'],r['local']+'.gz')) - self.assertEqual(upload.call_args[0][1], - os.path.join(options['data_url'],r['remote'])) - - @patch('iceprod.core.exe.functions.upload') - @unittest_reporter(name='uploadData - tar') - async def test_022_uploadData(self, upload): - # create an environment - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff3.tar' - r['local'] = 'localstuff3' - r['compression'] = True - r['type'] = 'permanent' - r['movement'] = 'both' - - # create the downloaded file - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, {'f':'the data'}) - - async def up(*args,**kwargs): - pass - upload.side_effect = up - - # try uploading the data - await iceprod.core.exe.uploadData(env,r) - self.assertTrue(upload.called) - self.assertEqual(upload.call_args[0][0], - os.path.join(options['data_directory'],r['local']+'.tar')) - self.assertEqual(upload.call_args[0][1], - os.path.join(options['data_url'],r['remote'])) - - @patch('iceprod.core.exe.functions.upload') - @unittest_reporter(name='uploadData - tar.bz2') - async def test_023_uploadData(self, upload): - # create an environment - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff4.tar.bz2' - r['local'] = 'localstuff4' - r['compression'] = True - r['type'] = 'permanent' - r['movement'] = 'both' - - # create the downloaded file - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, {'f':'the data'}) - - async def up(*args,**kwargs): - pass - upload.side_effect = up - - # try uploading the data - await iceprod.core.exe.uploadData(env,r) - self.assertTrue(upload.called) - self.assertEqual(upload.call_args[0][0], - os.path.join(options['data_directory'],r['local']+'.tar.bz2')) - self.assertEqual(upload.call_args[0][1], - os.path.join(options['data_url'],r['remote'])) - - @patch('iceprod.core.exe.functions.upload') - @unittest_reporter(name='uploadData - tgz') - async def test_024_uploadData(self, upload): - # create an environment - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff5.tgz' - r['local'] = 'localstuff5' - r['compression'] = True - r['type'] = 'permanent' - r['movement'] = 'both' - - # create the downloaded file - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, {'f':'the data'}) - - async def up(*args,**kwargs): - pass - upload.side_effect = up - - # try uploading the data - await iceprod.core.exe.uploadData(env,r) - self.assertTrue(upload.called) - self.assertEqual(upload.call_args[0][0], - os.path.join(options['data_directory'],r['local']+'.tgz')) - self.assertEqual(upload.call_args[0][1], - os.path.join(options['data_url'],r['remote'])) - - @patch('iceprod.core.exe.functions.upload') - @unittest_reporter(name='uploadData - invalid env') - async def test_025_uploadData(self, upload): - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff5.tgz' - r['local'] = 'localstuff5' - r['type'] = 'permanent' - r['movement'] = 'both' - - async def up(*args,**kwargs): - pass - upload.side_effect = up - - with self.assertRaises(Exception): - await iceprod.core.exe.uploadData({},r) - - @patch('iceprod.core.exe.functions.upload') - @unittest_reporter(name='uploadData - maybe transfer') - async def test_026_uploadData(self, upload): - # create an environment - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff' - r['local'] = 'localstuff' - r['type'] = 'permanent' - r['movement'] = 'both' - r['transfer'] = 'maybe' - - async def up(*args,**kwargs): - pass - upload.side_effect = up - - # test that no file means no upload - await iceprod.core.exe.uploadData(env,r) - upload.assert_not_called() - - # create the downloaded file - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, 'the data') - - # try uploading the data - await iceprod.core.exe.uploadData(env,r) - self.assertTrue(upload.called) - self.assertEqual(upload.call_args[0][0], - os.path.join(options['data_directory'],r['local'])) - self.assertEqual(upload.call_args[0][1], - os.path.join(options['data_url'],r['remote'])) - - @patch('iceprod.core.exe.functions.upload') - @unittest_reporter(name='uploadData - no transfer') - async def test_027_uploadData(self, upload): - # create an environment - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'stuff' - r['local'] = 'localstuff' - r['type'] = 'permanent' - r['movement'] = 'both' - r['transfer'] = 'not' - - async def up(*args,**kwargs): - pass - upload.side_effect = up - - # test that no file means no upload - await iceprod.core.exe.uploadData(env,r) - upload.assert_not_called() - - @patch('iceprod.core.exe.functions.upload') - @unittest_reporter(name='uploadData - oauth creds') - async def test_028_uploadData(self, upload): - # create an environment - token_file = os.path.join(self.test_dir, 'token') - with open(token_file, 'w') as f: - f.write('XXX') - options = {'data_url': 'http://blah/downloads', - 'data_directory': self.test_dir, - 'credentials_dir': self.test_dir, - 'credentials': {'http://blah': 'token'}} - env = {'options':options} - - # create a resource object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'http://blah/downloads/stuff' - r['local'] = 'localstuff' - r['type'] = 'permanent' - r['movement'] = 'both' - - # create the downloaded file - path = os.path.join(self.test_dir, r['local']) - self.mk_files(path, 'the data') - - async def up(*args,**kwargs): - assert kwargs['options']['token'] == 'XXX' - upload.side_effect = up - - # try uploading the data - await iceprod.core.exe.uploadData(env,r) - self.assertTrue(upload.called) - self.assertEqual(upload.call_args[0][0], - os.path.join(options['data_directory'],r['local'])) - self.assertEqual(upload.call_args[0][1], r['remote']) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='setupClass') - async def test_030_setupClass(self, download): - # create an env - env = {'options':{'local_temp':os.path.join(self.test_dir,'classes')}} - os.mkdir(env['options']['local_temp']) - - # create a class object - r = iceprod.core.dataclasses.Class() - r['name'] = 'datatransfer.py' - r['src'] = 'datatransfer.py' - - # create the downloaded file - async def create(*args,**kwargs): - path = os.path.join(env['options']['local_temp'], r['name']) - self.mk_files(path, 'class GridFTP(): pass', ext=True) - return path - download.side_effect = create - - # try setting up the class - await iceprod.core.exe.setupClass(env,r) - - self.assertIn(r['name'], env['classes']) - self.assertIn(os.path.dirname(env['classes'][r['name']]), - os.environ['PYTHONPATH'].split(':')) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='setupClass - env $CLASS') - async def test_031_setupClass(self, download): - # create an env - env = {'options':{'local_temp':os.path.join(self.test_dir,'classes')}} - os.mkdir(env['options']['local_temp']) - - # create a class object - r = iceprod.core.dataclasses.Class() - r['name'] = 'datatransfer.py' - r['src'] = 'datatransfer.py' - r['env_vars'] = 'I3_BUILD=$CLASS' - - # create the downloaded file - async def create(*args,**kwargs): - path = os.path.join(env['options']['local_temp'], r['name']) - self.mk_files(path, 'class GridFTP(): pass', ext=True) - return path - download.side_effect = create - - # try setting up the class - await iceprod.core.exe.setupClass(env,r) - - self.assertIn(r['name'], env['classes']) - self.assertIn(os.path.dirname(env['classes'][r['name']]), - os.environ['PYTHONPATH'].split(':')) - self.assertIn('I3_BUILD', os.environ) - self.assertEqual(os.environ['I3_BUILD'], env['classes'][r['name']]) - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='setupClass - env overload') - async def test_032_setupClass(self, download): - # create an env - env = {'options':{'local_temp':os.path.join(self.test_dir,'classes')}} - os.mkdir(env['options']['local_temp']) - - # create a class object - r = iceprod.core.dataclasses.Class() - r['name'] = 'datatransfer.py' - r['src'] = 'datatransfer.py' - r['env_vars'] = 'tester=1:2:3;PATH=$PWD;PYTHONPATH=$PWD/test' - - # create the downloaded file - async def create(*args,**kwargs): - path = os.path.join(env['options']['local_temp'], r['name']) - self.mk_files(path, 'class GridFTP(): pass', ext=True) - return path - download.side_effect = create - - # try setting up the class - await iceprod.core.exe.setupClass(env,r) - - self.assertIn(r['name'], env['classes']) - self.assertIn(os.path.dirname(env['classes'][r['name']]), - os.environ['PYTHONPATH'].split(':')) - self.assertIn('tester', os.environ) - self.assertEqual(os.environ['tester'], '1:2:3') - self.assertIn('PATH', os.environ) - self.assertIn('$PWD', os.environ['PATH'].split(':')) - self.assertIn('PYTHONPATH', os.environ) - self.assertIn('$PWD/test', os.environ['PYTHONPATH'].split(':')) - - @unittest_reporter(name='setupenv - basic') - async def test_100_setupenv_basic(self): - """Test basic setupenv functionality""" - obj = iceprod.core.dataclasses.Steering() - # create an empty env - async with iceprod.core.exe.SetupEnv(self.config, obj) as empty_env: - # create secondary env - async with iceprod.core.exe.SetupEnv(self.config, obj, empty_env) as env2: - # create something in env2, and check it's not in empty_env - env2['test'] = 'testing' - self.assertNotIn('test', empty_env, 'env2 is a direct link to empty_env') - - # make new env from env2, and check it has that value - async with iceprod.core.exe.SetupEnv(self.config, obj, env2) as env3: - self.assertIn('test', env3, 'env3 does not have test value') - self.assertEqual(env3['test'], 'testing', 'env3 does not have test value') - - # check that modifying a value in env3 has no effect on env2 - env3['test'] = 'abcd' - self.assertEqual(env2['test'], 'testing', 'env3 is a direct link to env2') - - # check that modifying a value in env2 has no effect on env3 - env2['test'] = 'dcba' - self.assertEqual(env3['test'], 'abcd', 'env2 is a direct link to env3') - - # do second level checks, like dealing with parameters - obj.parameters = {} - async with iceprod.core.exe.SetupEnv(self.config, obj) as env4: - async with iceprod.core.exe.SetupEnv(self.config, obj, env4) as env5: - env5['parameters']['test'] = 1 - self.assertNotIn('test', env4['parameters'], - 'adding a parameter in env5 adds it to env4') - async with iceprod.core.exe.SetupEnv(self.config, obj, env5) as env6: - env6['parameters']['test'] = 2 - self.assertNotEqual(env5['parameters']['test'], 2, - 'modifying a parameter in env6 modifies it in env5') - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='setupenv - steering') - async def test_101_setupenv_steering(self, download): - """Test setupenv with steering object""" - # create the steering object - steering = iceprod.core.dataclasses.Steering() - - # create a resource object - r = iceprod.core.dataclasses.Resource() - r['remote'] = 'globus.tar.gz' - r['local'] = 'globus.tar.gz' - steering['resources'].append(r) - - # create some parameters - steering['parameters'] = {'test_param':'value'} - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - # set testing resource directory - options['subprocess_dir'] = os.path.join(self.test_dir,'resources') - - # set download() return value - async def create(*args,**kwargs): - path = os.path.join(options['subprocess_dir'],r['local']) - self.mk_files(path, {'f':'blah'}, compress='gz') - return path - download.side_effect = create - - # create the env - async with iceprod.core.exe.SetupEnv(self.config, steering, - {'options':options}) as env: - - # test parameters - for p in steering['parameters']: - if p not in env['parameters']: - raise Exception('Parameters were not applied ' + - 'correctly: missing %r'%p) - - # test options - for p in options: - if p not in env['options']: - raise Exception('Options were not applied ' + - 'correctly: missing %r'%p) - - # test resource - if r['local'] not in env['files']: - raise Exception('downloadResource did not add the file ' + - '%s to the env'%r['local']) - if (env['files'][r['local']] != - os.path.join(self.test_dir,'resources',r['local'])): - raise Exception('downloadResource did not return the ' + - 'expected filename of %s' % - os.path.join(self.test_dir,'resources', - r['local'])) - if not os.path.isfile(env['files'][r['local']]): - raise Exception('downloadResource did not write to the ' + - 'expected filename of %s' % - env['files'][r['local']]) - - @patch('iceprod.core.exe.functions.upload') - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='destroyenv - steering') - async def test_102_destroyenv_steering(self, download, upload): - """Test destroyenv with steering object""" - # create the steering object - steering = iceprod.core.dataclasses.Steering() - - # create a data object - r = iceprod.core.dataclasses.Data() - r['remote'] = 'globus.tar.gz' - r['local'] = 'globus.tar.gz' - r['type'] = 'permanent' - r['movement'] = 'both' - steering['data'].append(r) - - # create parameters - steering['parameters'] = {'test_param':'value'} - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - # set testing data directory - options['subprocess_dir'] = os.path.join(self.test_dir,'data') - - # set download() return value - async def create(*args,**kwargs): - path = os.path.join(options['subprocess_dir'],r['local']) - self.mk_files(path, {'f':'blah'}, compress='gz') - return path - download.side_effect = create - async def up(*args,**kwargs): - pass - upload.side_effect = up - - # try a file deletion - filename = os.path.join(self.test_dir,'test_file') - with open(filename,'w') as f: - f.write('this is a test') - - # create the env - async with iceprod.core.exe.SetupEnv(self.config, steering, {'options':options}) as env: - env['deletions'] = [filename] - - if os.path.exists(filename): - raise Exception('failed to delete file') - - # try environment reset - - # create the env - async with iceprod.core.exe.SetupEnv(self.config, steering, - {'options':options, - 'deletions':[filename]}) as env: - os.environ['MyTestVar'] = 'testing' - - if 'MyTestVar' in os.environ: - raise Exception('failed to delete environment entry') - - if os.path.exists(filename): - raise Exception('failed to delete file') - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runmodule - iceprod module (from src)') - async def test_200_runmodule_iceprod_src(self, download): - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['src'] = 'file:/test.py' - module['running_class'] = 'Test' - - # create parameters - module['parameters'] = {'greeting': 'new greeting'} - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = True - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(*args, **kwargs): - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - logger.info('create file %s', path) - self.mk_files(path, """ -class IPBaseClass: - def __init__(self): - self.params = {} - def AddParameter(self,p,h,d): - self.params[p] = d - def GetParameter(self,p): - return self.params[p] - def SetParameter(self,p,v): - self.params[p] = v -class Test(IPBaseClass): - def __init__(self): - IPBaseClass.__init__(self) - def Execute(self,stats): - return 0 -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options} - - # run the module - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runmodule(self.config, env, module): - await mod.wait() - except: - logger.error('running the module failed') - logger.info('\n%s', open(constants['stderr']).read()) - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runmodule - iceprod module (clear env)') - async def test_201_runmodule_iceprod_env(self, download): - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['src'] = 'file:/test.py' - module['running_class'] = 'Test' - module['env_clear'] = True - - # create parameters - module['parameters'] = {'greeting': 'new greeting'} - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(*args, **kwargs): - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -class IPBaseClass: - def __init__(self): - self.params = {} - def AddParameter(self,p,h,d): - self.params[p] = d - def GetParameter(self,p): - return self.params[p] - def SetParameter(self,p,v): - self.params[p] = v -class Test(IPBaseClass): - def __init__(self): - IPBaseClass.__init__(self) - def Execute(self,stats): - return 0 -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options} - - # run the module - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runmodule(self.config, env, module): - await mod.wait() - except: - logger.error('running the module failed') - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runmodule - simple module from src') - async def test_210_runmodule_simple(self, download): - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['src'] = 'file:/test.py' - module['running_class'] = 'Test' - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(*args, **kwargs): - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -def Test(): - return 'Tester' -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options} - - # run the module - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runmodule(self.config, env, module): - await mod.wait() - except: - logger.error('running the module failed') - raise - - # try with short form of class - module['running_class'] = 'Test' - - # run the module - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runmodule(self.config, env, module): - await mod.wait() - except: - logger.error('running the module failed (short)') - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runmodule - python script') - async def test_211_runmodule_script(self, download): - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['src'] = 'file:/test.py' - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://x2100.icecube.wisc.edu/downloads' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp.icecube.wisc.edu/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://code.icecube.wisc.edu/svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(*args, **kwargs): - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -def Test(): - return 'Tester' -if __name__ == '__main__': - Test() -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options} - - # run the module - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runmodule(self.config, env, module): - await mod.wait() - except: - logger.error('running the module failed') - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runmodule - shell script') - async def test_212_runmodule_script(self, download): - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['src'] = 'file:/test.sh' - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(*args, **kwargs): - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -uname -a -echo "test" -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options} - - # run the module - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runmodule(self.config, env, module): - await mod.wait() - except: - logger.error('running the module failed') - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runmodule - config file') - async def test_213_runmodule_configs(self, download): - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['src'] = 'file:/test.sh' - module['configs'] = {'foo': {'bar': 123}} - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(*args, **kwargs): - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -uname -a -echo "test" -cat foo|grep 123 -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options} - - # run the module - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runmodule(self.config, env, module): - await mod.wait() - except: - logger.error('running the module failed') - raise - - # now for failure - async def create(*args, **kwargs): - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -uname -a -echo "test" -cat foobar|grep 123 -""", ext=True) - return path - download.side_effect = create - - # run the module - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runmodule(self.config, env, module): - await mod.wait() - except: - pass - else: - logger.error('running the module succeeded when not supposed to') - raise Exception('running the module succeeded when not supposed to') - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runmodule - args string') - async def test_214_runmodule_args(self, download): - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['src'] = 'file:/test.py' - module['args'] = '-a b --c d e' - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(*args, **kwargs): - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -import argparse -p = argparse.ArgumentParser() -p.add_argument('-a') -p.add_argument('--c') -p.add_argument('e', nargs='+') -args = p.parse_args() -assert args.a == 'b' -assert args.c == 'd' -assert args.e == ['e'] -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options} - - # run the module - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runmodule(self.config, env, module): - await mod.wait() - except: - logger.error('running the module failed') - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runmodule - args json') - async def test_215_runmodule_args(self, download): - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['src'] = 'file:/test.py' - module['args'] = '{"kwargs": {"a": "b", "cc": "d"}, "args": ["e"]}' - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(*args, **kwargs): - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -import argparse -p = argparse.ArgumentParser() -p.add_argument('-a') -p.add_argument('--cc') -p.add_argument('e', nargs='+') -args = p.parse_args() -assert args.a == 'b' -assert args.cc == 'd' -assert args.e == ['e'] -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options} - - # run the module - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runmodule(self.config, env, module): - await mod.wait() - except: - logger.error('running the module failed') - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runmodule - args dict') - async def test_216_runmodule_args(self, download): - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['src'] = 'file:/test.py' - module['args'] = {"kwargs": {"a": "b", "cc": "d"}, "args": ["e"]} - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(*args, **kwargs): - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -import argparse -p = argparse.ArgumentParser() -p.add_argument('-a') -p.add_argument('--cc') -p.add_argument('e', nargs='+') -args = p.parse_args() -assert args.a == 'b' -assert args.cc == 'd' -assert args.e == ['e'] -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options} - - # run the module - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runmodule(self.config, env, module): - await mod.wait() - except: - logger.error('running the module failed') - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runmodule - python script (clear env)') - async def test_220_runmodule_script(self, download): - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['src'] = 'file:/test.py' - module['env_clear'] = True - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(*args, **kwargs): - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -def Test(): - return 'Tester' -if __name__ == '__main__': - Test() -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options} - - # run the module - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runmodule(self.config, env, module): - await mod.wait() - except: - logger.error('running the module failed') - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runmodule - python script (env_shell)') - async def test_221_runmodule_script(self, download): - # create env_shell - env_shell = os.path.join(self.test_dir,'env_shell.sh') - with open(env_shell,'w') as f: - f.write('#!/bin/sh\nfoo=bar $@\n') - os.chmod(env_shell, 0o777) - - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['src'] = 'file:/test.py' - module['env_shell'] = env_shell - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(*args, **kwargs): - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -import os -def Test(): - if os.environ['foo'] != 'bar': - raise Exception('bad env_shell') -if __name__ == '__main__': - Test() -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options} - - # run the module - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runmodule(self.config, env, module): - await mod.wait() - except: - logger.error('running the module failed') - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runmodule - with linked libraries') - async def test_230_runmodule_icetray(self, download): - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['running_class'] = 'test.Test' - module['env_clear'] = False - - c = iceprod.core.dataclasses.Class() - c['name'] = 'test' - c['src'] = 'test.tar.gz' - module['classes'].append(c) - - # make .so file - so = self.make_shared_lib() - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(url, *args, **kwargs): - path = os.path.join(options['local_temp'], c['src']) - self.mk_files(path, {'test.py':""" -import hello -def Test(): - return hello.say_hello('Tester') -""", 'hello.so':so}, compress='gz') - return path - download.side_effect = create - - # set env - env = {'options': options} - - # run the module - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runmodule(self.config, env, module): - await mod.wait() - except: - logger.error('running the module failed') - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runtray') - async def test_300_runtray(self, download): - """Test runtray""" - # create the tray object - tray = iceprod.core.dataclasses.Tray() - tray['name'] = 'tray' - - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['running_class'] = 'test.Test' - module['env_clear'] = False - - c = iceprod.core.dataclasses.Class() - c['name'] = 'test' - c['src'] = 'test.tar.gz' - module['classes'].append(c) - tray['modules'].append(module) - - # create another module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module2' - module['running_class'] = 'Test' - module['src'] = 'file:/test2.py' - tray['modules'].append(module) - - # make .so file - so = self.make_shared_lib() - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(url, *args, **kwargs): - if url.endswith(c['src']): - path = os.path.join(options['local_temp'], c['src']) - self.mk_files(path, {'test.py':""" -import hello -def Test(): - return hello.say_hello('Tester') -""", 'hello.so':so}, compress='gz') - else: - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -def Test(): - return 'Tester2' -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options} - - # run the tray - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runtray(self.config, env, tray): - await mod.wait() - except: - logger.error('running the tray failed') - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runtray - iterations') - async def test_310_runtray_iter(self, download): - """Test runtray iterations""" - # create the tray object - tray = iceprod.core.dataclasses.Tray() - tray['name'] = 'tray' - tray['iterations'] = 3 - - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['running_class'] = 'test.Test' - module['env_clear'] = False - - c = iceprod.core.dataclasses.Class() - c['name'] = 'test' - c['src'] = 'test.tar.gz' - module['classes'].append(c) - tray['modules'].append(module) - - # create another module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module2' - module['running_class'] = 'Test' - module['src'] = 'file:/test.py' - module['args'] = ['$(iter)'] - tray['modules'].append(module) - - # make .so file - so = self.make_shared_lib() - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(url, *args, **kwargs): - if url.endswith(c['src']): - path = os.path.join(options['local_temp'], c['src']) - self.mk_files(path, {'test.py':""" -import hello -def Test(): - return hello.say_hello('Tester') -""", 'hello.so':so}, compress='gz') - else: - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -def Test(*args,**kwargs): - print(args,kwargs) - return 'Tester2' -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options} - - # run the tray - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runtray(self.config, env, tray): - await mod.wait() - except: - logger.error('running the tray failed') - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runtask') - async def test_400_runtask(self, download): - # create the task object - task = iceprod.core.dataclasses.Task() - task['name'] = 'task' - - # create the tray object - tray = iceprod.core.dataclasses.Tray() - tray['name'] = 'tray' - - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['running_class'] = 'test.Test' - module['env_clear'] = False - - c = iceprod.core.dataclasses.Class() - c['name'] = 'test' - c['src'] = 'test.tar.gz' - module['classes'].append(c) - tray['modules'].append(module) - - # create another module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module2' - module['running_class'] = 'Test' - module['src'] = 'file:/test.py' - tray['modules'].append(module) - - # add tray to task - task['trays'].append(tray) - - # make .so file - so = self.make_shared_lib() - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(url, *args, **kwargs): - if url.endswith(c['src']): - path = os.path.join(options['local_temp'], c['src']) - self.mk_files(path, {'test.py':""" -import hello -def Test(): - return hello.say_hello('Tester') -""", 'hello.so':so}, compress='gz') - else: - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -def Test(): - return 'Tester2' -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options,'stats':{'tasks':[]}} - - # run the tray - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runtask(self.config, env, task): - await mod.wait() - except: - logger.error('running the tray failed') - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runtask - task_files') - async def test_401_runtask(self, download): - # create the task object - task = iceprod.core.dataclasses.Task() - task['name'] = 'task' - - # create the tray object - tray = iceprod.core.dataclasses.Tray() - tray['name'] = 'tray' - - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['running_class'] = 'test.Test' - module['env_clear'] = False - - c = iceprod.core.dataclasses.Class() - c['name'] = 'test' - c['src'] = 'test.tar.gz' - module['classes'].append(c) - tray['modules'].append(module) - - # create another module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module2' - module['running_class'] = 'Test' - module['src'] = 'file:/test.py' - tray['modules'].append(module) - - # add tray to task - task['trays'].append(tray) - - # make .so file - so = self.make_shared_lib() - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://foo/' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - if 'subprocess_dir' not in options: - options['subprocess_dir'] = os.path.join(self.test_dir,'subprocess_dir') - - async def create(url, *args, **kwargs): - logger.info('create: %s', url) - if url.endswith('foobar'): - path = os.path.join(options['subprocess_dir'], 'foobar') - self.mk_files(path, """foobar""", ext=True) - elif url.endswith(c['src']): - path = os.path.join(options['local_temp'], c['src']) - self.mk_files(path, {'test.py':""" -import hello -def Test(): - return hello.say_hello('Tester') -""", 'hello.so':so}, compress='gz') - else: - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -def Test(): - return 'Tester2' -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options,'stats':{'tasks':[]}} - - # enable files api - task['task_files'] = True - self.config.config['options']['dataset_id'] = 'd' - self.config.config['options']['task_id'] = 't' - self.config.config['options']['offline_transfer'] = True - self.config.rpc = MagicMock() - async def files(dataset_id, task_id): - d = iceprod.core.dataclasses.Data() - d['remote'] = 'http://test/foobar' - d['movement'] = 'input' - return [d] - self.config.rpc.task_files.side_effect = files - - # run the tray - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runtask(self.config, env, task): - await mod.wait() - except: - logger.error('running the tray failed') - raise - self.config.rpc.task_files.assert_called_once() - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runtask - multiple trays') - async def test_410_runtask_multi(self, download): - # create the task object - task = iceprod.core.dataclasses.Task() - task['name'] = 'task' - - # create the tray object - tray = iceprod.core.dataclasses.Tray() - tray['name'] = 'tray' - - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['running_class'] = 'test.Test' - module['env_clear'] = False - - c = iceprod.core.dataclasses.Class() - c['name'] = 'test' - c['src'] = 'test.tar.gz' - module['classes'].append(c) - tray['modules'].append(module) - - # create another module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module2' - module['running_class'] = 'Test' - module['src'] = 'file:/test.py' - tray['modules'].append(module) - - # add tray to task - task['trays'].append(tray) - - # create the tray object - tray = iceprod.core.dataclasses.Tray() - tray['name'] = 'tray2' - - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['running_class'] = 'test.Test' - module['env_clear'] = False - - c = iceprod.core.dataclasses.Class() - c['name'] = 'test' - c['src'] = 'test.tar.gz' - module['classes'].append(c) - tray['modules'].append(module) - - # create another module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module2' - module['running_class'] = 'Test' - module['src'] = 'file:/test.py' - tray['modules'].append(module) - - # add tray to task - task['trays'].append(tray) - - # make .so file - so = self.make_shared_lib() - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://x2100.icecube.wisc.edu/downloads' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp.icecube.wisc.edu/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://code.icecube.wisc.edu/svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(url, *args, **kwargs): - if url.endswith(c['src']): - path = os.path.join(options['local_temp'], c['src']) - self.mk_files(path, {'test.py':""" -import hello -def Test(): - return hello.say_hello('Tester') -""", 'hello.so':so}, compress='gz') - else: - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -def Test(): - return 'Tester2' -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options,'stats':{'tasks':[]}} - - # run the tray - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runtask(self.config, env, task): - await mod.wait() - except: - logger.error('running the tray failed') - raise - - @patch('iceprod.core.exe.functions.download') - @unittest_reporter(name='runtask - multiple trays with iterations') - async def test_420_runtask_multi_iter(self, download): - """Test runtask with multiple trays and iterations""" - # create the task object - task = iceprod.core.dataclasses.Task() - task['name'] = 'task' - - # create the tray object - tray = iceprod.core.dataclasses.Tray() - tray['name'] = 'tray' - - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['running_class'] = 'test.Test' - module['env_clear'] = False - - c = iceprod.core.dataclasses.Class() - c['name'] = 'test' - c['src'] = 'test.tar.gz' - module['classes'].append(c) - tray['modules'].append(module) - - # create another module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module2' - module['running_class'] = 'Test' - module['src'] = 'file:/test.py' - tray['modules'].append(module) - - # add tray to task - task['trays'].append(tray) - - # create the tray object - tray = iceprod.core.dataclasses.Tray() - tray['name'] = 'tray2' - tray['iterations'] = 3 - - # create the module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module' - module['running_class'] = 'test.Test' - module['env_clear'] = False - - c = iceprod.core.dataclasses.Class() - c['name'] = 'test' - c['src'] = 'test.tar.gz' - module['classes'].append(c) - tray['modules'].append(module) - - # create another module object - module = iceprod.core.dataclasses.Module() - module['name'] = 'module2' - module['running_class'] = 'Test' - module['src'] = 'file:/test.py' - tray['modules'].append(module) - - # add tray to task - task['trays'].append(tray) - - # make .so file - so = self.make_shared_lib() - - # check that validate, resource_url, debug are in options - options = {} - if 'validate' not in options: - options['validate'] = True - if 'resource_url' not in options: - options['resource_url'] = 'http://x2100.icecube.wisc.edu/downloads' - if 'debug' not in options: - options['debug'] = False - - # make sure some basic options are set - if 'data_url' not in options: - options['data_url'] = 'gsiftp://gridftp.icecube.wisc.edu/' - if 'svn_repository' not in options: - options['svn_repository'] = 'http://code.icecube.wisc.edu/svn/' - if 'job_temp' not in options: - options['job_temp'] = os.path.join(self.test_dir,'job_temp') - if 'local_temp' not in options: - options['local_temp'] = os.path.join(self.test_dir,'local_temp') - - async def create(url, *args, **kwargs): - if url.endswith(c['src']): - path = os.path.join(options['local_temp'], c['src']) - self.mk_files(path, {'test.py':""" -import hello -def Test(): - return hello.say_hello('Tester') -""", 'hello.so':so}, compress='gz') - else: - path = os.path.join(options['local_temp'], os.path.basename(module['src'])) - self.mk_files(path, """ -def Test(): - return 'Tester2' -""", ext=True) - return path - download.side_effect = create - - # set env - env = {'options': options,'stats':{'tasks':[]}} - - # run the tray - with to_log(sys.stdout,'stdout'),to_log(sys.stderr,'stderr'): - try: - async for mod in iceprod.core.exe.runtask(self.config, env, task): - await mod.wait() - except: - logger.error('running the tray failed') - raise - - -def load_tests(loader, tests, pattern): - suite = unittest.TestSuite() - alltests = glob_tests(loader.getTestCaseNames(exe_test)) - suite.addTests(loader.loadTestsFromNames(alltests,exe_test)) - return suite + t = get_task({ + 'options': { + 'job_temp': 'https://foo.bar', + }, + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{}] + }], + 'data': [data], + }] + }) + + c = iceprod.core.exe.ConfigParser(t.dataset, logger=logger) + ret = iceprod.core.exe.uploadData(data, c, logger=logger) + assert ret == Data('https://foo.bar/baz', 'baz') + + +async def test_write_to_script_no_module(tmp_path): + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{}] + }], + }] + }) + + ws = iceprod.core.exe.WriteToScript(t, workdir=tmp_path, logger=logger) + with pytest.raises(iceprod.core.exe.ConfigError): + await ws.convert() + assert not ws.infiles + assert not ws.outfiles + + +async def test_write_to_script_module_src(tmp_path): + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{ + 'env_clear': False, + 'src': 'foo.py' + }] + }], + }] + }) + + ws = iceprod.core.exe.WriteToScript(t, workdir=tmp_path, logger=logger) + scriptpath = await ws.convert() + + assert not ws.infiles + assert not ws.outfiles + script = open(scriptpath).read() + lines = [line for line in script.split('\n') if not (not line.strip() or line.startswith('#') or line.startswith('set '))] + assert lines == ['python foo.py'] + + +async def test_write_to_script_module_shell(tmp_path): + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{ + 'env_clear': False, + 'src': 'foo.sh' + }] + }], + }] + }) + + ws = iceprod.core.exe.WriteToScript(t, workdir=tmp_path, logger=logger) + scriptpath = await ws.convert() + + assert not ws.infiles + assert not ws.outfiles + script = open(scriptpath).read() + lines = [line for line in script.split('\n') if not (not line.strip() or line.startswith('#') or line.startswith('set '))] + assert lines == ['/bin/sh foo.sh'] + + +async def test_write_to_script_module_binary(tmp_path): + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{ + 'env_clear': False, + 'src': 'foo' + }] + }], + }] + }) + + ws = iceprod.core.exe.WriteToScript(t, workdir=tmp_path, logger=logger) + scriptpath = await ws.convert() + + assert not ws.infiles + assert not ws.outfiles + script = open(scriptpath).read() + lines = [line for line in script.split('\n') if not (not line.strip() or line.startswith('#') or line.startswith('set '))] + assert lines == ['./foo'] + +async def test_write_to_script_module_binary_fullpath(tmp_path): + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{ + 'env_clear': False, + 'src': '/cvmfs/foo' + }] + }], + }] + }) + + ws = iceprod.core.exe.WriteToScript(t, workdir=tmp_path, logger=logger) + scriptpath = await ws.convert() + + assert not ws.infiles + assert not ws.outfiles + script = open(scriptpath).read() + lines = [line for line in script.split('\n') if not (not line.strip() or line.startswith('#') or line.startswith('set '))] + assert lines == ['/cvmfs/foo'] + + +async def test_write_to_script_module_class(tmp_path): + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{ + 'env_clear': False, + 'running_class': 'foo' + }] + }], + }] + }) + + ws = iceprod.core.exe.WriteToScript(t, workdir=tmp_path, logger=logger) + scriptpath = await ws.convert() + + assert not ws.infiles + assert not ws.outfiles + script = open(scriptpath).read() + lines = [line for line in script.split('\n') if not (not line.strip() or line.startswith('#') or line.startswith('set '))] + assert lines == ['python -m foo'] + + +async def test_write_to_script_tray_iter(tmp_path): + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'iterations': 3, + 'modules': [{ + 'env_clear': False, + 'src': 'foo.py', + 'args': '$(iter)' + }] + }], + }] + }) + + ws = iceprod.core.exe.WriteToScript(t, workdir=tmp_path, logger=logger) + scriptpath = await ws.convert() + + assert not ws.infiles + assert not ws.outfiles + script = open(scriptpath).read() + logging.debug('script: \n%s', script) + lines = [line for line in script.split('\n') if not (not line.strip() or line.startswith('#') or line.startswith('set '))] + assert lines == [ + 'python foo.py 0', + 'python foo.py 1', + 'python foo.py 2', + ] + + +async def test_write_to_script_module_env_clear(tmp_path): + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{ + 'env_clear': True, + 'src': 'foo.py' + }] + }], + }] + }) + + ws = iceprod.core.exe.WriteToScript(t, workdir=tmp_path, logger=logger) + scriptpath = await ws.convert() + + assert not ws.infiles + assert not ws.outfiles + script = open(scriptpath).read() + lines = [line for line in script.split('\n') if not (not line.strip() or line.startswith('#') or line.startswith('set '))] + assert len(lines) == 1 + assert lines[0].startswith('env -i ') + assert lines[0].endswith(' python foo.py') + + +async def test_write_to_script_module_env_shell(tmp_path): + t = get_task({ + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{ + 'env_clear': False, + 'env_shell': '/foo/bar/baz.sh', + 'src': 'foo.py' + }] + }], + }] + }) + + ws = iceprod.core.exe.WriteToScript(t, workdir=tmp_path, logger=logger) + scriptpath = await ws.convert() + + assert not ws.infiles + assert not ws.outfiles + script = open(scriptpath).read() + lines = [line for line in script.split('\n') if not (not line.strip() or line.startswith('#') or line.startswith('set '))] + assert lines == ['/foo/bar/baz.sh python foo.py'] + + +async def test_write_to_script_data(tmp_path): + t = get_task({ + 'options': { + 'job_temp': 'https://foo.bar', + }, + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{ + 'env_clear': False, + 'src': 'foo.py' + }] + }], + 'data': [{ + 'movement': 'input', + 'type': 'permanent', + 'remote': 'https://foo.bar/baz', + }, { + 'movement': 'output', + 'type': 'job_temp', + 'local': '1234', + }] + }] + }) + + ws = iceprod.core.exe.WriteToScript(t, workdir=tmp_path, logger=logger) + scriptpath = await ws.convert() + + assert ws.infiles == {Data('https://foo.bar/baz', 'baz')} + assert ws.outfiles == {Data('https://foo.bar/1234', '1234')} + script = open(scriptpath).read() + lines = [line for line in script.split('\n') if not (not line.strip() or line.startswith('#') or line.startswith('set '))] + assert lines == ['python foo.py'] + + +async def test_write_to_script_data_dups(tmp_path): + t = get_task({ + 'options': { + 'job_temp': 'https://foo.bar', + }, + 'tasks': [{ + 'name': 'foo', + 'trays': [{ + 'modules': [{ + 'env_clear': False, + 'src': 'foo.py', + 'data': [{ + 'movement': 'input', + 'type': 'permanent', + 'remote': 'https://foo.bar/baz', + }, { + 'movement': 'output', + 'type': 'job_temp', + 'local': '1234', + }] + }], + 'data': [{ + 'movement': 'input', + 'type': 'permanent', + 'remote': 'https://foo.bar/baz', + }] + }], + 'data': [{ + 'movement': 'input', + 'type': 'permanent', + 'remote': 'https://foo.bar/baz', + }] + }] + }) + + ws = iceprod.core.exe.WriteToScript(t, workdir=tmp_path, logger=logger) + scriptpath = await ws.convert() + + assert ws.infiles == {Data('https://foo.bar/baz', 'baz')} + assert ws.outfiles == {Data('https://foo.bar/1234', '1234')} + script = open(scriptpath).read() + lines = [line for line in script.split('\n') if not (not line.strip() or line.startswith('#') or line.startswith('set '))] + assert lines == ['python foo.py']