diff --git a/deckard/base/files/files.py b/deckard/base/files/files.py index ba871710..c4db3278 100644 --- a/deckard/base/files/files.py +++ b/deckard/base/files/files.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Dict from copy import deepcopy +from omegaconf import ListConfig from ..utils import my_hash @@ -72,7 +73,8 @@ def __init__( else None ) self.name = name if name else None - self.stage = stage if stage else None + stage = stage if stage else None + self.stage = stage[-1] if isinstance(stage, (list, ListConfig)) else stage self.files = files if files else {} logger.debug(f"FileConfig init: {self.files}") @@ -116,6 +118,9 @@ def _set_filenames(self, **kwargs): if attack_dir is not None else None ) + print("reports", reports) + print("stage", stage) + input("Press Enter to continue...") if name is None and stage is None: path = Path(reports) elif name is not None and stage is None: diff --git a/deckard/base/model/model.py b/deckard/base/model/model.py index d335ef60..4e992cc9 100644 --- a/deckard/base/model/model.py +++ b/deckard/base/model/model.py @@ -133,7 +133,7 @@ def __call__(self, data: list, model: object, library=None): start = process_time_ns() start_timestamp = time() model.fit(data[0], data[2], **trainer) - end = process_time_ns() - start + end = process_time_ns() end_timestamp = time() except np.AxisError: # pragma: no cover from art.utils import to_categorical @@ -142,7 +142,7 @@ def __call__(self, data: list, model: object, library=None): start = process_time_ns() start_timestamp = time() model.fit(data[0], data[2], **trainer) - end = process_time_ns() - start + end = process_time_ns() end_timestamp = time() except ValueError as e: # pragma: no cover if "Shape of labels" in str(e): @@ -153,7 +153,7 @@ def __call__(self, data: list, model: object, library=None): start = process_time_ns() start_timestamp = time() model.fit(data[0], data[2], **trainer) - end = process_time_ns() - start + end = process_time_ns() end_timestamp = time() else: raise e @@ -165,7 +165,7 @@ def __call__(self, data: list, model: object, library=None): start = process_time_ns() start_timestamp = time() model.fit(data[0], data[2], **trainer) - end = process_time_ns() - start + end = process_time_ns() end_timestamp = time() except Exception as e: raise e @@ -177,7 +177,7 @@ def __call__(self, data: list, model: object, library=None): start = process_time_ns() start_timestamp = time() model.fit(data[0], data[2], **trainer) - end = process_time_ns() - start + end = process_time_ns() end_timestamp = time() elif "should be the same" in str(e).lower(): import torch @@ -197,7 +197,7 @@ def __call__(self, data: list, model: object, library=None): start = process_time_ns() start_timestamp = time() model.fit(data[0], data[2], **trainer) - end = process_time_ns() - start + end = process_time_ns() end_timestamp = time() else: raise e @@ -564,7 +564,7 @@ def predict(self, data=None, model=None, predictions_file=None): start = process_time_ns() start_timestamp = time() predictions = model.predict(data[1]) - end = process_time_ns() - start + end = process_time_ns() end_timestamp = time() except NotFittedError as e: # pragma: no cover logger.warning(e) @@ -582,7 +582,7 @@ def predict(self, data=None, model=None, predictions_file=None): except Exception as e: # pragma: no cover logger.error(e) raise e - end = process_time_ns() - start + end = process_time_ns() end_timestamp = time() if predictions_file is not None: self.data.save(predictions, predictions_file) @@ -630,13 +630,13 @@ def predict_proba(self, data=None, model=None, probabilities_file=None): start = process_time_ns() start_timestamp = time() predictions = model.predict_proba(data[1]) - end = process_time_ns() - start + end = process_time_ns() end_timestamp = time() else: start = process_time_ns() start_timestamp = time() predictions = model.predict(data[1]) - end = process_time_ns() - start + end = process_time_ns() end_timestamp = time() if probabilities_file is not None: self.data.save(predictions, probabilities_file) @@ -683,19 +683,19 @@ def predict_log_loss(self, data, model, losses_file=None): start = process_time_ns() start_timestamp = time() predictions = model.predict_log_proba(data[1]) - end = process_time_ns() - start + end = process_time_ns() end_timestamp = time() elif hasattr(model, "predict_proba"): start = process_time_ns() start_timestamp = time() predictions = model.predict_proba(data[1]) - end = process_time_ns() - start + end = process_time_ns() end_timestamp = time() elif hasattr(model, "predict"): start = process_time_ns() start_timestamp = time() predictions = model.predict(data[1]) - end = process_time_ns() - start + end = process_time_ns() end_timestamp = time() else: # pragma: no cover raise ValueError( diff --git a/deckard/layers/experiment.py b/deckard/layers/experiment.py index 14d9ba43..26e30f62 100644 --- a/deckard/layers/experiment.py +++ b/deckard/layers/experiment.py @@ -3,11 +3,11 @@ import dvc.api from hydra.utils import instantiate -from dulwich.errors import NotGitRepository import yaml import argparse from copy import deepcopy from ..base.utils import unflatten_dict +from .utils import save_params_file logger = logging.getLogger(__name__) @@ -27,8 +27,9 @@ def get_dvc_stage_params( name=None, ): logger.info( - f"Getting params for stage {stage} from {params_file} and {pipeline_file} in {directory}.", + f"Getting params for stage {stage} from {params_file} and {pipeline_file} in {Path(directory).resolve().as_posix()}.", ) + stage = [stage] if not isinstance(stage, list) else stage params = dvc.api.params_show(stages=stage) params.update({"_target_": "deckard.base.experiment.Experiment"}) files = dvc.api.params_show(pipeline_file, stages=stage, repo=directory) @@ -69,14 +70,9 @@ def run_stage( def get_stages(pipeline_file="dvc.yaml", stages=None, repo=None): - try: - def_stages = list( - dvc.api.params_show(pipeline_file, repo=repo)["stages"].keys(), - ) - except NotGitRepository: - raise ValueError( - f"Directory {repo} is not a git repository. Please run `dvc init` in {repo} and try again.", - ) + with Path(repo, pipeline_file).open("r") as f: + pipeline = yaml.safe_load(f)["stages"] + def_stages = list(pipeline.keys()) if stages is None or stages == []: raise ValueError(f"Please specify one or more stage(s) from {def_stages}") elif isinstance(stages, str): @@ -111,23 +107,32 @@ def run_stages(stages, pipeline_file="dvc.yaml", params_file="params.yaml", repo dvc_parser.add_argument("--verbosity", type=str, default="INFO") dvc_parser.add_argument("--params_file", type=str, default="params.yaml") dvc_parser.add_argument("--pipeline_file", type=str, default="dvc.yaml") - dvc_parser.add_argument("--config_dir", type=str, default="conf") + dvc_parser.add_argument("--config_dir", type=str, default=None) dvc_parser.add_argument("--config_file", type=str, default="default") - dvc_parser.add_argument("--workdir", type=str, default=".") + dvc_parser.add_argument("--dvc_repository", type=str, default=None) args = dvc_parser.parse_args() - config_dir = Path(args.workdir, args.config_dir).resolve().as_posix() - # save_params_file( - # config_dir=config_dir, - # config_file=args.config_file, - # params_file=args.params_file, - # ) + if args.config_dir is not None: + args.config_dir = Path(args.config_dir).resolve().as_posix() + assert args.config_file is not None, "Please specify a config file." + if ( + args.config_dir is not None + and Path(args.config_dir, args.config_file).is_file() + ): + save_params_file( + config_dir=args.config_dir, + config_file=args.config_file, + params_file=args.params_file, + ) + else: + pass logging.basicConfig( level=args.verbosity, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) + dvc_repository = args.dvc_repository if args.dvc_repository is not None else "." results = run_stages( stages=args.stage, pipeline_file=args.pipeline_file, params_file=args.params_file, - repo=args.workdir, + repo=dvc_repository, ) diff --git a/deckard/layers/utils.py b/deckard/layers/utils.py index 18db9b01..7eedee56 100644 --- a/deckard/layers/utils.py +++ b/deckard/layers/utils.py @@ -3,13 +3,12 @@ from hydra.errors import OverrideParseException from omegaconf import OmegaConf -from copy import deepcopy import yaml from hydra import initialize_config_dir, compose from numpy import nan -from ..base.utils import my_hash +from ..base.utils import my_hash, flatten_dict logger = logging.getLogger(__name__) @@ -58,63 +57,32 @@ def find_conf_files( return files -def get_overrides(file: str, key: str = None, overrides=None): - if overrides is None: - overrides = {} - else: - if isinstance(overrides, str): - overrides = overrides.split(",") - if isinstance(overrides, list): - overrides = { - entry.split("=")[0]: entry.split("=")[1] for entry in overrides - } - if isinstance(overrides, dict): - new_dict = deepcopy(overrides) - for k, v in new_dict.items(): - if k.startswith("++"): - overrides[k] = v - elif k.startswith("+"): - overrides[f"++{k[1:]}"] = v - elif k.startswith("~~"): - overrides[f"~~{k[2:]}"] = v - else: - overrides[f"++{k}"] = v - - # assert isinstance(overrides, dict), f"Expected list, got {type(overrides)}" - # if key is not None and len(overrides) > 0: - # overrides.pop(f"{key}.name", None) - # overrides.pop(f"files.{key}_file", None) - # overrides[f"++{key}.name"] = Path(file).stem - # overrides[f"++files.{key}_file"] = Path(file).stem - # overrides[f"{key}"] = Path(file).stem - # overrides["++stage"] = key +def get_overrides(file: str, folder, overrides=None): + with open(Path(folder, file), "r") as f: + old_cfg = yaml.safe_load(f) + old_cfg = OmegaConf.create(old_cfg) + old_cfg = OmegaConf.to_container(old_cfg, resolve=True) + flat_cfg = flatten_dict(old_cfg) + overrides = [] if overrides is None else overrides + if isinstance(overrides, str): + overrides = overrides.split(",") + assert isinstance(overrides, list), f"Expected list, got {type(overrides)}" + new_overrides = [] + for override in overrides: + k, v = override.split("=") + if k in flat_cfg: + k = f"++{k}" + elif k not in flat_cfg and not k.startswith("+"): + k = f"+{k}" + else: + pass + new_overrides.append(f"{k}={v}") + overrides = new_overrides return overrides def compose_experiment(file, config_dir, overrides=None, default_file="default.yaml"): - if hasattr(file, "as_posix"): - file = file.as_posix() - if overrides in [None, "", "None", "none", "NONE", "null", "Null", "NULL"]: - overrides = [] - elif isinstance(overrides, str): - overrides = overrides.split(",") - if isinstance(overrides, list): - pass - elif isinstance(overrides, dict): - new_dict = deepcopy(overrides) - for k, v in new_dict.items(): - if k.startswith("++"): - overrides[k] = v - elif k.startswith("+"): - overrides[f"++{k[1:]}"] = v - elif k.startswith("--"): - overrides[f"++{k[2:]}"] = v - else: - overrides[f"++{k}"] = v - else: - raise TypeError(f"Expected list or dict, got {type(overrides)}") - assert isinstance(file, str), f"Expected str, got {type(file)}" - # file = Path(data_conf_dir, file).as_posix() + overrides = get_overrides(file=file, folder=config_dir, overrides=overrides) logger.info(f"Running experiment in config_dir: {config_dir}") logger.info(f"Running experiment with config_name: {file}") config_dir = Path(Path(), config_dir).resolve().as_posix() @@ -122,14 +90,15 @@ def compose_experiment(file, config_dir, overrides=None, default_file="default.y with initialize_config_dir(config_dir=config_dir, version_base="1.3"): try: cfg = compose(config_name=Path(default_file).stem, overrides=overrides) - except OverrideParseException: + except OverrideParseException: # pragma: no cover raise ValueError(f"Failed to parse overrides: {overrides}") cfg = OmegaConf.to_container(cfg, resolve=True) cfg["_target_"] = "deckard.Experiment" id_ = str(my_hash(cfg)) cfg["name"] = id_ cfg["files"]["name"] = id_ - return cfg + cfg = OmegaConf.create(cfg) + return cfg def save_params_file( diff --git a/params.yaml b/params.yaml new file mode 100644 index 00000000..7c9febf0 --- /dev/null +++ b/params.yaml @@ -0,0 +1,122 @@ +_target_: deckard.base.experiment.Experiment +attack: + attack_size: 10 + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + batch_size: 64 + init_eval: 10 + max_eval: 10 + max_iter: 10 + model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn + name: art.attacks.evasion.HopSkipJump + method: evasion + model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn +data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true +files: + _target_: deckard.base.files.FileConfig + adv_losses_file: adv_losses.json + adv_predictions_file: adv_predictions.json + adv_probabilities_file: adv_probabilities.json + attack_dir: attack + attack_file: attack.pkl + data_dir: data + data_file: data.pkl + directory: ??? + losses_file: losses.json + model_dir: model + model_file: model.pkl + name: evasion + predictions_file: predictions.json + probabilities_file: probabilities.json + report_dir: report + score_dict_file: score_dict.json +model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn +optimizers: +- accuracy +- train_time +scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + direction: maximize + name: sklearn.metrics.accuracy_score + normalize: true + log_loss: + direction: minimize + name: sklearn.metrics.log_loss + normalize: true diff --git a/test/conf/experiment/.dvc/.gitignore b/test/conf/experiment/.dvc/.gitignore new file mode 100644 index 00000000..528f30c7 --- /dev/null +++ b/test/conf/experiment/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/test/conf/experiment/.dvc/config b/test/conf/experiment/.dvc/config new file mode 100644 index 00000000..e69de29b diff --git a/test/conf/experiment/.dvcignore b/test/conf/experiment/.dvcignore new file mode 100644 index 00000000..51973055 --- /dev/null +++ b/test/conf/experiment/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/test/conf/experiment/evasion.yaml b/test/conf/experiment/evasion.yaml index 7463b531..c466b49c 100644 --- a/test/conf/experiment/evasion.yaml +++ b/test/conf/experiment/evasion.yaml @@ -1,71 +1,126 @@ -data: +_target_: deckard.base.experiment.Experiment +attack: + attack_size: 10 + data: generate: - name: classification - random_state : 0 - n_samples : 2000 - n_features : 20 + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 sample: - # _target_: deckard.base.data.sampler.SklearnDataSampler - random_state : 0 - stratify: True - train_size : 100 - test_size : 1000 -model: - data: ${data} - library : sklearn - init: - name : sklearn.svm.SVC + random_state: 0 + stratify: true + init: + batch_size: 64 + init_eval: 10 + max_eval: 10 + max_iter: 10 + model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 kernel: rbf - C : 1.0 + name: sklearn.svm.SVC probability: true + library: sklearn + name: art.attacks.evasion.HopSkipJump + method: evasion + model: art: - library : sklearn - pipeline: - initialize: - _target_ : deckard.base.model.art_pipeline.ArtPipelineStage -attack: - data : ${data} - model : ${model} + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true init: - name: art.attacks.evasion.HopSkipJump - batch_size : 64 - max_iter : 10 - max_eval : 10 - init_eval : 10 - model: ${model} - attack_size : 10 - method : evasion + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn +data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true files: - _target_: deckard.base.files.FileConfig - data_dir : data - model_dir : model - attack_dir : attack - report_dir : report - directory : ??? - data_file : data.pkl - model_file : model.pkl - attack_file : attack.pkl - score_dict_file : score_dict.json - predictions_file : predictions.json - probabilities_file : probabilities.json - losses_file : losses.json - adv_predictions_file : adv_predictions.json - adv_probabilities_file : adv_probabilities.json - adv_losses_file : adv_losses.json - name : evasion + _target_: deckard.base.files.FileConfig + adv_losses_file: adv_losses.json + adv_predictions_file: adv_predictions.json + adv_probabilities_file: adv_probabilities.json + attack_dir: attack + attack_file: attack + attack_type: .pkl + data_dir: data + data_file: data + data_type : .pkl + directory: output + losses_file: losses.json + model_dir: model + model_file: model + model_type: .pkl + name: evasion + predictions_file: predictions.json + probabilities_file: probabilities.json + reports: report + stage : '???' + score_dict_file: score_dict.json +model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn +optimizers: +- accuracy +- train_time scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - name: sklearn.metrics.accuracy_score - direction: maximize - normalize: True - log_loss: - name: sklearn.metrics.log_loss - direction: minimize - normalize: True -_target_: deckard.base.experiment.Experiment -optimizers: - - accuracy - - train_time - - + _target_: deckard.base.scorer.ScorerDict + accuracy: + direction: maximize + name: sklearn.metrics.accuracy_score + normalize: true + log_loss: + direction: minimize + name: sklearn.metrics.log_loss + normalize: true diff --git a/test/conf/experiment/params.yaml b/test/conf/experiment/params.yaml new file mode 100644 index 00000000..e69de29b diff --git a/test/layers/test_utils.py b/test/layers/test_utils.py new file mode 100644 index 00000000..0abe636a --- /dev/null +++ b/test/layers/test_utils.py @@ -0,0 +1,147 @@ +import unittest +from pathlib import Path +from tempfile import mkdtemp +from shutil import rmtree +import os +from hydra import initialize_config_dir, compose +from hydra.utils import instantiate +from deckard.layers.utils import ( + find_conf_files, + get_overrides, + compose_experiment, + save_params_file, +) + + +this_dir = Path(os.path.realpath(__file__)).parent.resolve().as_posix() + + +class testFindConfFiles(unittest.TestCase): + config_dir = Path(this_dir, "../conf/experiment").resolve().as_posix() + config_file = "evasion.yaml" + + def setUp(self): + with initialize_config_dir( + config_dir=Path(self.config_dir).resolve().as_posix(), + version_base="1.3", + ): + cfg = compose(config_name=self.config_file) + self.cfg = cfg + self.dir = mkdtemp() + self.cfg["files"]["directory"] = self.dir + self.exp = instantiate(config=self.cfg) + + def test_find_conf_files_from_name(self): + files = find_conf_files( + config_name=self.config_file, + config_subdir="experiment", + config_dir=Path(self.config_dir).parent, + ) + self.assertEqual(Path(files[0]).name, self.config_file) + self.assertEqual(Path(files[0]).parent.name, Path(self.config_dir).name) + + def test_find_conf_files_from_regex(self): + files = find_conf_files( + config_regex="*.yaml", + config_subdir="experiment", + config_dir=Path(self.config_dir).parent, + ) + self.assertEqual(Path(files[0]).name, self.config_file) + self.assertEqual(Path(files[0]).parent.name, Path(self.config_dir).name) + + def test_find_conf_files_from_default(self): + files = find_conf_files( + default_file=Path(self.config_dir, "experiment", self.config_file), + config_subdir="experiment", + config_dir=Path(self.config_dir).parent, + ) + self.assertEqual(Path(files[0]).name, self.config_file) + self.assertEqual(Path(files[0]).parent.name, Path(self.config_dir).name) + + def tearDown(self) -> None: + rmtree(self.dir) + + +class testGetOverrides(unittest.TestCase): + file = "evasion.yaml" + overrides = ["++data.sample.random_state=420"] + config_dir = Path(this_dir, "../conf/experiment").resolve().as_posix() + + def setup(self): + with initialize_config_dir( + config_dir=Path(self.config_dir).resolve().as_posix(), + version_base="1.3", + ): + cfg = compose(config_name=self.file) + self.cfg = cfg + + def test_override(self): + overrides = get_overrides( + file=self.file, + folder=self.config_dir, + overrides=self.overrides, + ) + with initialize_config_dir( + config_dir=Path(self.config_dir).resolve().as_posix(), + version_base="1.3", + ): + cfg = compose(config_name=self.file, overrides=overrides) + self.assertEqual(cfg.data.sample.random_state, 420) + + +class testGetOverridesFromString(testGetOverrides): + file = "evasion.yaml" + overrides = "++data.sample.random_state=420" + config_dir = Path(this_dir, "../conf/experiment").resolve().as_posix() + + +class testComposeExperiment(unittest.TestCase): + file = "evasion.yaml" + overrides = ["data.sample.random_state=420", "data.sample.train_size=100"] + config_dir = Path(this_dir, "../conf/experiment").resolve().as_posix() + + def setup(self): + with initialize_config_dir( + config_dir=Path(self.config_dir).resolve().as_posix(), + version_base="1.3", + ): + cfg = compose(config_name=self.file) + self.cfg = cfg + + def test_compose(self): + exp = compose_experiment( + file=self.file, + config_dir=self.config_dir, + overrides=self.overrides, + default_file=self.file, + ) + self.assertEqual(exp.data.sample.random_state, 420) + + +class testSaveParamsFile(unittest.TestCase): + file = "evasion.yaml" + overrides = ["++data.sample.random_state=420"] + config_dir = Path(this_dir, "../conf/experiment").resolve().as_posix() + params_file = "params.yaml" + dir = mkdtemp() + params_file = Path(dir, params_file) + + def setup(self): + with initialize_config_dir( + config_dir=Path(self.config_dir).resolve().as_posix(), + version_base="1.3", + ): + cfg = compose(config_name=self.file) + self.cfg = cfg + + def test_save(self): + save_params_file( + config_dir=self.config_dir, + config_file=self.file, + overrides=self.overrides, + params_file=self.params_file, + ) + self.assertTrue(Path(self.params_file).exists()) + + def tearDown(self) -> None: + rmtree(self.dir) diff --git a/test/pipelines/.dvc/.gitignore b/test/pipelines/.dvc/.gitignore new file mode 100644 index 00000000..528f30c7 --- /dev/null +++ b/test/pipelines/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/test/pipelines/.dvc/config b/test/pipelines/.dvc/config new file mode 100644 index 00000000..e69de29b diff --git a/test/pipelines/.dvcignore b/test/pipelines/.dvcignore new file mode 100644 index 00000000..51973055 --- /dev/null +++ b/test/pipelines/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/test/pipelines/evasion/.dvc/.gitignore b/test/pipelines/evasion/.dvc/.gitignore new file mode 100644 index 00000000..528f30c7 --- /dev/null +++ b/test/pipelines/evasion/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/test/pipelines/evasion/.dvc/config b/test/pipelines/evasion/.dvc/config new file mode 100644 index 00000000..e69de29b diff --git a/test/pipelines/evasion/.dvcignore b/test/pipelines/evasion/.dvcignore new file mode 100644 index 00000000..51973055 --- /dev/null +++ b/test/pipelines/evasion/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/test/pipelines/evasion/dvc.lock b/test/pipelines/evasion/dvc.lock new file mode 100644 index 00000000..3683adf3 --- /dev/null +++ b/test/pipelines/evasion/dvc.lock @@ -0,0 +1,233 @@ +schema: '2.0' +stages: + train: + cmd: python -m deckard.layers.experiment train + params: + params.yaml: + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + files: + _target_: deckard.base.files.FileConfig + adv_losses_file: adv_losses.json + adv_predictions_file: adv_predictions.json + adv_probabilities_file: adv_probabilities.json + attack_dir: attack + attack_file: attack + attack_type: .pkl + data_dir: data + data_file: data + data_type: .pkl + directory: output + losses_file: losses.json + model_dir: model + model_file: model + model_type: .pkl + name: evasion + predictions_file: predictions.json + probabilities_file: probabilities.json + reports: report + stage: ??? + score_dict_file: score_dict.json + model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + direction: maximize + name: sklearn.metrics.accuracy_score + normalize: true + log_loss: + direction: minimize + name: sklearn.metrics.log_loss + normalize: true + outs: + - path: output/data/data.pkl + hash: md5 + md5: 041e10899ad1a52e1c7afe4b8916efad + size: 336313 + - path: output/model/model.pkl + hash: md5 + md5: a5bfc655087882ec2322526fe6427794 + size: 74196 + - path: output/report/train/evasion/predictions.json + hash: md5 + md5: 1c83accb1ad8df240177f5f17f85424b + size: 17700 + - path: output/report/train/evasion/score_dict.json + hash: md5 + md5: 6e1d64d1acf2b879ab27d07b51715de8 + size: 854 + attack: + cmd: python -m deckard.layers.experiment attack + deps: + - path: output/data/data.pkl + hash: md5 + md5: 041e10899ad1a52e1c7afe4b8916efad + size: 336313 + - path: output/model/model.pkl + hash: md5 + md5: a5bfc655087882ec2322526fe6427794 + size: 74196 + params: + params.yaml: + attack: + attack_size: 10 + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + batch_size: 64 + init_eval: 10 + max_eval: 10 + max_iter: 10 + model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn + name: art.attacks.evasion.HopSkipJump + method: evasion + model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + files: + _target_: deckard.base.files.FileConfig + adv_losses_file: adv_losses.json + adv_predictions_file: adv_predictions.json + adv_probabilities_file: adv_probabilities.json + attack_dir: attack + attack_file: attack + attack_type: .pkl + data_dir: data + data_file: data + data_type: .pkl + directory: output + losses_file: losses.json + model_dir: model + model_file: model + model_type: .pkl + name: evasion + predictions_file: predictions.json + probabilities_file: probabilities.json + reports: report + stage: ??? + score_dict_file: score_dict.json + model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + direction: maximize + name: sklearn.metrics.accuracy_score + normalize: true + log_loss: + direction: minimize + name: sklearn.metrics.log_loss + normalize: true + outs: + - path: output/attack/attack.pkl + hash: md5 + md5: c45edebdd505e3879d60322f779d4cd0 + size: 952 + - path: output/report/attack/evasion/adv_predictions.json + hash: md5 + md5: 7cae7b08d059d1e2e8c1b718fd0ba028 + size: 421 + - path: output/report/attack/evasion/score_dict.json + hash: md5 + md5: 9cb7f317bc4f6b2198f6088e25ccefc4 + size: 1134 diff --git a/test/pipelines/evasion/dvc.yaml b/test/pipelines/evasion/dvc.yaml new file mode 100644 index 00000000..e1175ba3 --- /dev/null +++ b/test/pipelines/evasion/dvc.yaml @@ -0,0 +1,119 @@ +stages: + train: + cmd: python -m deckard.layers.experiment train + params: + - data + - model + - scorers + - files + outs: + - ${files.directory}/${files.data_dir}/${files.data_file}${files.data_type} + - ${files.directory}/${files.model_dir}/${files.model_file}${files.model_type} + # - ${files.directory}/${files.reports}/train/${files.name}/${files.params_file} + # - ${files.directory}/${files.reports}/train/${files.name}/${files.test_labels_file} # Omit to save space + - ${files.directory}/${files.reports}/train/${files.name}/${files.predictions_file} # logit outputs for our model + # - ${files.directory}/${files.reports}/train/${files.name}/${files.probabilities_file} # Omit to save space + metrics: + - ${files.directory}/${files.reports}/train/${files.name}/${files.score_dict_file} + attack: + cmd: python -m deckard.layers.experiment attack + params: + - data + - model + - attack + - scorers + - files + outs: + - ${files.directory}/${files.attack_dir}/${files.attack_file}${files.attack_type} + - ${files.directory}/${files.reports}/attack/${files.name}/${files.adv_predictions_file} + # - ${files.directory}/${files.reports}/attack/${files.name}/${files.params_file} + deps: + - ${files.directory}/${files.data_dir}/${files.data_file}${files.data_type} + - ${files.directory}/${files.model_dir}/${files.model_file}${files.model_type} + metrics: + - ${files.directory}/${files.reports}/attack/${files.name}/${files.score_dict_file} + + ############################################################################## + # models: # This is a loop over the ResNet models + # foreach: + # - ResNet18 + # # - ResNet34 + # # - ResNet50 + # # - ResNet101 + # # - ResNet152 + # do: # This script configures eazch defence + # cmd: bash models.sh ++model.init.name=torch_example.${item} stage=train ++hydra.sweeper.storage=sqlite:///${files.directory}/${files.reports}/train/${item}.db --config-name mnist.yaml + # deps: + # - models.sh + # - ${files.directory}/${files.model_dir}/${files.model_file}${files.model_type} + # - ${files.directory}/${files.model_dir}/${files.model_file}.optimizer${files.model_type} + # outs: + # - ${files.directory}/${files.reports}/train/${item}.db: # This outputs a database file for each model + # cache: True + # persist: True + # attacks: + # foreach: # This is a loop over the ResNet models + # - ResNet18 + # - ResNet34 + # - ResNet50 + # - ResNet101 + # - ResNet152 + # do: + # cmd: bash attacks.sh ++attack.attack_size=100 ++model.init.name=torch_example.${item} stage=attack ++hydra.sweeper.storage=sqlite:///${files.directory}/${files.reports}/attack/${item}.db --config-name mnist.yaml + # deps: + # - models.sh # This script configures each defence + # - attacks.sh # This script configures each attack + # - ${files.directory}/${files.reports}/attack/${files.name}/${files.score_dict_file} # This is here just to ensure it runs after the attack stage + # # - ${files.directory}/${files.reports}/train/${item}.db + # outs: + # - ${files.directory}/${files.reports}/attack/${item}.db: # This outputs a database file for each model + # cache: True + # persist: True + # compile: + # foreach: # iterates through each stage + # # - train + # - attack + # do: + # cmd: python -m deckard.layers.compile --report_folder ${files.directory}/${files.reports}/${item} --results_file ${files.directory}/${files.reports}/${item}.csv + # deps: + # - ${files.directory}/${files.reports}/${item}/ + # - ${files.directory}/${files.reports}/${item}/ResNet18.db + # - ${files.directory}/${files.reports}/${item}/ResNet34.db + # - ${files.directory}/${files.reports}/${item}/ResNet50.db + # - ${files.directory}/${files.reports}/${item}/ResNet101.db + # # - ${files.directory}/${files.reports}/${item}/ResNet152.db + # outs: + # - ${files.directory}/${files.reports}/${item}.csv + # plot: + # cmd : python -m deckard.layers.plots --path ${files.directory}/plots/ --file ${files.directory}/${files.reports}/attack.csv -o data.csv + # deps: + # - ${files.directory}/${files.reports}/attack.csv + # - ${files.directory}/${files.reports}/attack/ResNet18.db + # - ${files.directory}/${files.reports}/attack/ResNet34.db + # - ${files.directory}/${files.reports}/attack/ResNet50.db + # - ${files.directory}/${files.reports}/attack/ResNet101.db + # - ${files.directory}/${files.reports}/attack/ResNet152.db + # outs: + # - ${files.directory}/plots/data.csv + # afr: + # cmd: python -m deckard.layers.afr --dataset ${files.directory} --data_file ${files.directory}/plots/data.csv --target adv_accuracy --duration_col adv_fit_time --dataset mnist + # deps: + # - ${files.directory}/plots/data.csv + # plots: + # - ${files.directory}/plots/weibull_aft.pdf + # - ${files.directory}/plots/weibull_partial_effects.pdf + # - ${files.directory}/plots/cox_partial_effects.pdf + # - ${files.directory}/plots/cox_aft.pdf + # - ${files.directory}/plots/log_logistic_aft.pdf + # - ${files.directory}/plots/log_logistic_partial_effects.pdf + # - ${files.directory}/plots/log_normal_aft.pdf + # - ${files.directory}/plots/log_normal_partial_effects.pdf + # metrics: + # - ${files.directory}/plots/aft_comparison.csv + # outs: + # - ${files.directory}/plots/aft_comparison.tex + # copy_results: + # cmd: cp -r ${files.directory}/plots/* ~/ml_afr/mnist/ + # deps: + # - ${files.directory}/plots/data.csv + # - ${files.directory}/plots/aft_comparison.csv diff --git a/test/pipelines/evasion/output/report/attack/evasion/params.yaml b/test/pipelines/evasion/output/report/attack/evasion/params.yaml new file mode 100644 index 00000000..7598db9f --- /dev/null +++ b/test/pipelines/evasion/output/report/attack/evasion/params.yaml @@ -0,0 +1,125 @@ +_target_: deckard.base.experiment.Experiment +attack: + attack_size: 10 + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + batch_size: 64 + init_eval: 10 + max_eval: 10 + max_iter: 10 + model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn + name: art.attacks.evasion.HopSkipJump + method: evasion + model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn +data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true +files: + _target_: deckard.base.files.FileConfig + adv_losses_file: adv_losses.json + adv_predictions_file: adv_predictions.json + adv_probabilities_file: adv_probabilities.json + attack_dir: attack + attack_file: attack + attack_type: .pkl + data_dir: data + data_file: data + data_type: .pkl + directory: output + losses_file: losses.json + model_dir: model + model_file: model + model_type: .pkl + name: evasion + predictions_file: predictions.json + probabilities_file: probabilities.json + reports: report + score_dict_file: score_dict.json + stage: &id001 + - attack +model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn +scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + direction: maximize + name: sklearn.metrics.accuracy_score + normalize: true + log_loss: + direction: minimize + name: sklearn.metrics.log_loss + normalize: true +stage: *id001 diff --git a/test/pipelines/evasion/output/report/train/evasion/params.yaml b/test/pipelines/evasion/output/report/train/evasion/params.yaml new file mode 100644 index 00000000..057eba71 --- /dev/null +++ b/test/pipelines/evasion/output/report/train/evasion/params.yaml @@ -0,0 +1,65 @@ +_target_: deckard.base.experiment.Experiment +data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true +files: + _target_: deckard.base.files.FileConfig + adv_losses_file: adv_losses.json + adv_predictions_file: adv_predictions.json + adv_probabilities_file: adv_probabilities.json + attack_dir: attack + attack_file: attack + attack_type: .pkl + data_dir: data + data_file: data + data_type: .pkl + directory: output + losses_file: losses.json + model_dir: model + model_file: model + model_type: .pkl + name: evasion + predictions_file: predictions.json + probabilities_file: probabilities.json + reports: report + score_dict_file: score_dict.json + stage: &id001 + - train +model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn +scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + direction: maximize + name: sklearn.metrics.accuracy_score + normalize: true + log_loss: + direction: minimize + name: sklearn.metrics.log_loss + normalize: true +stage: *id001 diff --git a/test/pipelines/evasion/params.yaml b/test/pipelines/evasion/params.yaml new file mode 100644 index 00000000..c466b49c --- /dev/null +++ b/test/pipelines/evasion/params.yaml @@ -0,0 +1,126 @@ +_target_: deckard.base.experiment.Experiment +attack: + attack_size: 10 + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + batch_size: 64 + init_eval: 10 + max_eval: 10 + max_iter: 10 + model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn + name: art.attacks.evasion.HopSkipJump + method: evasion + model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn +data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true +files: + _target_: deckard.base.files.FileConfig + adv_losses_file: adv_losses.json + adv_predictions_file: adv_predictions.json + adv_probabilities_file: adv_probabilities.json + attack_dir: attack + attack_file: attack + attack_type: .pkl + data_dir: data + data_file: data + data_type : .pkl + directory: output + losses_file: losses.json + model_dir: model + model_file: model + model_type: .pkl + name: evasion + predictions_file: predictions.json + probabilities_file: probabilities.json + reports: report + stage : '???' + score_dict_file: score_dict.json +model: + art: + library: sklearn + pipeline: + initialize: + _target_: deckard.base.model.art_pipeline.ArtPipelineStage + data: + generate: + n_features: 20 + n_samples: 2000 + name: classification + random_state: 0 + sample: + random_state: 0 + stratify: true + init: + C: 1.0 + kernel: rbf + name: sklearn.svm.SVC + probability: true + library: sklearn +optimizers: +- accuracy +- train_time +scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + direction: maximize + name: sklearn.metrics.accuracy_score + normalize: true + log_loss: + direction: minimize + name: sklearn.metrics.log_loss + normalize: true