diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index 3c4d6bf5..1cfd65ea 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -7,4 +7,6 @@ jobs: - uses: actions/checkout@v2 - uses: psf/black@stable with: + options: "--check --verbose" src: "deckard/" + jupyter: true diff --git a/.gitignore b/.gitignore index b6774923..680aabf0 100644 --- a/.gitignore +++ b/.gitignore @@ -126,3 +126,19 @@ deckard/deckard.egg-info/* *log.txt *.hydra + + +# envs +env/ + + +# random pdfs +*.pdf +# random pngs +*.png + +# screenlog +screenlog.* + +# tmp.py +tmp.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0b74b0b3..4de900c8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,36 +1,36 @@ repos: - - repo: https://github.com/asottile/add-trailing-comma - rev: v2.2.3 - hooks: - - id: add-trailing-comma - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 # Use the ref you want to point at - hooks: - - id: check-builtin-literals - - id: check-case-conflict - - id: check-symlinks - - id: check-toml - - id: detect-private-key - - id: end-of-file-fixer - - id: check-yaml - args : ['--unsafe'] - - repo: https://github.com/hadialqattan/pycln - rev: v2.1.1 # Possible releases: https://github.com/hadialqattan/pycln/releases - hooks: - - id: pycln - args: [deckard/] - - repo: https://github.com/pycqa/flake8 - rev: '5.0.4' # pick a git hash / tag to point to - hooks: - - id: flake8 - exclude: __init__.py - args: [--ignore=E501 W503] - - repo: https://github.com/psf/black - rev: 22.8.0 - hooks: - - id: black +- repo: https://github.com/asottile/add-trailing-comma + rev: v3.1.0 + hooks: + - id: add-trailing-comma +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 # Use the ref you want to point at + hooks: + - id: check-builtin-literals + - id: check-case-conflict + - id: check-symlinks + - id: check-toml + - id: detect-private-key + - id: end-of-file-fixer + - id: check-yaml + args: [--unsafe] +- repo: https://github.com/hadialqattan/pycln + rev: v2.4.0 # Possible releases: https://github.com/hadialqattan/pycln/releases + hooks: + - id: pycln + args: [deckard/] +- repo: https://github.com/psf/black + rev: 24.2.0 + hooks: + - id: black # It is recommended to specify the latest version of Python # supported by your project here, or alternatively use # pre-commit's default_language_version, see # https://pre-commit.com/#top_level-default_language_version - language_version: python3 + language_version: python3 +- repo: https://github.com/pycqa/flake8 + rev: 7.0.0 # pick a git hash / tag to point to + hooks: + - id: flake8 + exclude: __init__.py + args: [--ignore=E501 W503] diff --git a/Dockerfile b/Dockerfile index 38e07f93..80fd2bee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,8 +6,6 @@ RUN python3 -m pip install nvidia-pyindex nvidia-cuda-runtime-cu11 RUN git clone https://github.com/simplymathematics/deckard.git WORKDIR /deckard RUN python3 -m pip install --editable . -RUN python3 -m pip install pytest torch torchvision tensorflow RUN git clone https://github.com/Trusted-AI/adversarial-robustness-toolbox.git RUN cd adversarial-robustness-toolbox && python3 -m pip install . RUN apt install python-is-python3 -RUN pytest test diff --git a/deckard/__init__.py b/deckard/__init__.py index 960c1a33..9d6190ae 100644 --- a/deckard/__init__.py +++ b/deckard/__init__.py @@ -46,7 +46,7 @@ }, }, "loggers": { - "deckard": {"handlers": ["default"]}, + "deckard": {"handlers": ["default"], "level": "INFO", "propagate": True}, "tests": {"handlers": ["test"], "level": "DEBUG", "propagate": True}, }, } diff --git a/deckard/__main__.py b/deckard/__main__.py index ce7b27e4..cbd1505f 100644 --- a/deckard/__main__.py +++ b/deckard/__main__.py @@ -1,5 +1,4 @@ -""""Runs a submodule passed as an arg.""" - +#!/usr/bin/env python3 import argparse import subprocess import logging @@ -44,9 +43,11 @@ def parse_and_repro(args, default_config="default.yaml", config_dir="conf"): if len(args) == 0: assert ( save_params_file( - config_dir=Path(Path(), config_dir) - if not Path(config_dir).is_absolute() - else Path(config_dir), + config_dir=( + Path(Path(), config_dir) + if not Path(config_dir).is_absolute() + else Path(config_dir) + ), config_file=default_config, ) is None diff --git a/deckard/base/attack/attack.py b/deckard/base/attack/attack.py index d7014503..976fab63 100644 --- a/deckard/base/attack/attack.py +++ b/deckard/base/attack/attack.py @@ -8,6 +8,9 @@ from omegaconf import DictConfig, OmegaConf from hydra.utils import instantiate from art.utils import to_categorical, compute_success +from sklearn.utils.validation import check_is_fitted +from sklearn.base import BaseEstimator +from sklearn.exceptions import NotFittedError from random import randint from ..data import Data from ..model import Model @@ -117,7 +120,13 @@ class EvasionAttack: kwargs: Union[dict, None] = field(default_factory=dict) def __init__( - self, name: str, data: Data, model: Model, init: dict, attack_size=-1, **kwargs + self, + name: str, + data: Data, + model: Model, + init: dict, + attack_size=-1, + **kwargs, ): self.name = name self.data = data @@ -148,6 +157,10 @@ def __call__( if attack_file is not None and Path(attack_file).exists(): samples = self.data.load(attack_file) else: + print(f"Type of self.init: {type(self.init)}") + print(f"Type of self.init.model: {type(self.init.model)}") + print(f"Type of model: {type(model)}") + atk = self.init(model=model, attack_size=self.attack_size) if targeted is True: @@ -466,7 +479,13 @@ class InferenceAttack: kwargs: Union[dict, None] = field(default_factory=dict) def __init__( - self, name: str, data: Data, model: Model, init: dict, attack_size=-1, **kwargs + self, + name: str, + data: Data, + model: Model, + init: dict, + attack_size=-1, + **kwargs, ): self.name = name self.data = data @@ -577,7 +596,13 @@ class ExtractionAttack: kwargs: Union[dict, None] = field(default_factory=dict) def __init__( - self, name: str, data: Data, model: Model, init: dict, attack_size=-1, **kwargs + self, + name: str, + data: Data, + model: Model, + init: dict, + attack_size=-1, + **kwargs, ): self.name = name self.data = data @@ -798,12 +823,21 @@ def __call__( adv_predictions_file=None, adv_probabilities_file=None, adv_losses_file=None, + **kwargs, ): name = self.init.name kwargs = deepcopy(self.kwargs) kwargs.update({"init": self.init.kwargs}) data = self.data() data, model = self.model.initialize(data) + if isinstance(model, BaseEstimator): + try: + check_is_fitted(model), "Model must be fitted before calling attack." + except NotFittedError as e: + logger.warning( + f"Model not fitted. Fitting model before attack. Error: {e}", + ) + model, _ = self.model.fit(data=data, model=model) if "art" not in str(type(model)): model = self.model.art(model=model, data=data) if self.method == "evasion": diff --git a/deckard/base/data/data.py b/deckard/base/data/data.py index 690d27c0..7b1859ea 100644 --- a/deckard/base/data/data.py +++ b/deckard/base/data/data.py @@ -4,10 +4,10 @@ from dataclasses import dataclass, field from pathlib import Path from typing import Union - import numpy as np from pandas import DataFrame, read_csv, Series - +from omegaconf import OmegaConf +from validators import url from ..utils import my_hash from .generator import DataGenerator from .sampler import SklearnDataSampler @@ -28,6 +28,7 @@ class Data: ) target: Union[str, None] = None name: Union[str, None] = None + drop: list = field(default_factory=list) def __init__( self, @@ -36,6 +37,8 @@ def __init__( sample: SklearnDataSampler = None, sklearn_pipeline: SklearnDataPipeline = None, target: str = None, + drop: list = [], + **kwargs, ): """Initialize the data object. If the data is generated, then generate the data and sample it. If the data is loaded, then load the data and sample it. @@ -46,9 +49,6 @@ def __init__( sklearn_pipeline (SklearnDataPipeline, optional): The sklearn pipeline. Defaults to None. target (str, optional): The target column. Defaults to None. """ - logger.info( - f"Instantiating {self.__class__.__name__} with name={name} and generate={generate} and sample={sample} and sklearn_pipeline={sklearn_pipeline} and target={target}", - ) if generate is not None: self.generate = ( generate @@ -66,16 +66,19 @@ def __init__( else: self.sample = SklearnDataSampler() if sklearn_pipeline is not None: + sklearn_pipeline = OmegaConf.to_container( + OmegaConf.create(sklearn_pipeline), + ) self.sklearn_pipeline = ( sklearn_pipeline - if isinstance(sklearn_pipeline, (SklearnDataPipeline, type(None))) + if isinstance(sklearn_pipeline, (SklearnDataPipeline)) else SklearnDataPipeline(**sklearn_pipeline) ) else: self.sklearn_pipeline = None + self.drop = drop self.target = target self.name = name if name is not None else my_hash(self) - logger.debug(f"Instantiating Data with id: {self.get_name()}") def get_name(self): """Get the name of the data object.""" @@ -91,7 +94,6 @@ def initialize(self, filename=None): """ if filename is not None and Path(filename).exists(): result = self.load(filename) - assert len(result) == 4, f"Data is not generated: {self.name}" elif self.generate is not None: result = self.generate() else: @@ -100,14 +102,23 @@ def initialize(self, filename=None): assert self.target is not None, "Target is not specified" y = result[self.target] X = result.drop(self.target, axis=1) - X = np.array(X) - y = np.array(y) + if self.drop != []: + X = X.drop(self.drop, axis=1) + X = X.to_numpy() + y = y.to_numpy() result = [X, y] + else: + if self.drop != []: + raise ValueError( + f"Drop is not supported for non-DataFrame data. Data is type {type(result)}", + ) if len(result) == 2: result = self.sample(*result) assert ( len(result) == 4 ), f"Data is not generated: {self.name} {result}. Length: {len(result)}," + if self.sklearn_pipeline is not None: + result = self.sklearn_pipeline(*result) return result def load(self, filename) -> DataFrame: @@ -125,6 +136,8 @@ def load(self, filename) -> DataFrame: elif suffix in [".pkl", ".pickle"]: with open(filename, "rb") as f: data = pickle.load(f) + elif suffix in [".npz"]: + data = np.load(filename) else: # pragma: no cover raise ValueError(f"Unknown file type {suffix}") return data @@ -138,6 +151,10 @@ def save(self, data, filename): logger.info(f"Saving data to {filename}") suffix = Path(filename).suffix Path(filename).parent.mkdir(parents=True, exist_ok=True) + if isinstance(data, dict): + for k, v in data.items(): + v = str(v) + data[k] = v if suffix in [".json"]: if isinstance(data, (Series, DataFrame)): data = data.to_dict() @@ -155,16 +172,20 @@ def save(self, data, filename): else: # pragma: no cover raise ValueError(f"Unknown data type {type(data)} for {filename}.") with open(filename, "w") as f: - json.dump(data, f) + json.dump(data, f, indent=4, sort_keys=True) elif suffix in [".csv"]: assert isinstance( data, (Series, DataFrame, dict, np.ndarray), ), f"Data must be a Series, DataFrame, or dict, not {type(data)} to save to {filename}" - DataFrame(data).to_csv(filename, index=False) + if isinstance(data, (np.ndarray)): + data = DataFrame(data) + data.to_csv(filename, index=False) elif suffix in [".pkl", ".pickle"]: with open(filename, "wb") as f: pickle.dump(data, f) + elif suffix in [".npz"]: + np.savez(filename, data) else: # pragma: no cover raise ValueError(f"Unknown file type {type(suffix)} for {suffix}") assert Path(filename).exists() @@ -174,19 +195,19 @@ def __call__( data_file=None, train_labels_file=None, test_labels_file=None, + **kwargs, ) -> list: """Loads data from file if it exists, otherwise generates data and saves it to file. Returns X_train, X_test, y_train, y_test as a list of arrays, typed according to the framework. :param filename: str :return: list """ - result_dict = {} - if data_file is not None and Path(data_file).exists(): - data = self.load(data_file) - assert len(data) == 4, f"Some data is missing: {self.name}" + if Path(self.name).is_file() or url(self.name): + new_data_file = data_file + data_file = self.name else: - data = self.initialize(filename=data_file) - assert len(data) == 4, f"Some data is missing: {self.name}" - data_file = self.save(data, data_file) + new_data_file = data_file + result_dict = {} + data = self.initialize(data_file) result_dict["data"] = data if train_labels_file is not None: self.save(data[2], train_labels_file) @@ -198,4 +219,6 @@ def __call__( assert Path( test_labels_file, ).exists(), f"Error saving test labels to {test_labels_file}" + if new_data_file is not None: + self.save(data, new_data_file) return data diff --git a/deckard/base/data/generator.py b/deckard/base/data/generator.py index 4283ed22..b9494756 100644 --- a/deckard/base/data/generator.py +++ b/deckard/base/data/generator.py @@ -1,6 +1,6 @@ import logging -from typing import Literal +from typing import Literal, Callable, Union from dataclasses import dataclass, field from pathlib import Path import numpy as np @@ -11,16 +11,19 @@ make_moons, make_circles, ) +from torchvision.io import read_image, read_file from art.utils import load_mnist, load_cifar10, load_diabetes, to_categorical from ..utils import my_hash +logger = logging.getLogger(__name__) + + __all__ = [ "SklearnDataGenerator", "TorchDataGenerator", "KerasDataGenerator", "DataGenerator", ] -logger = logging.getLogger(__name__) SKLEARN_DATASETS = [ "classification", @@ -222,6 +225,8 @@ def __call__(self): return TorchDataGenerator(self.name, **self.kwargs)() elif self.name in KERAS_DATASETS: return KerasDataGenerator(self.name, **self.kwargs)() + elif isinstance(self.name, str) and Path(self.name).exists(): + return SklearnDataGenerator(self.name, **self.kwargs)() else: # pragma: no cover raise ValueError( f"Invalid name {self.name}. Please choose from {ALL_DATASETS}", @@ -229,3 +234,63 @@ def __call__(self): def __hash__(self): return int(my_hash(self), 16) + + +@dataclass +class TorchBaseLoader: + name: str = "data/" + labels: str = "labels.csv" + transform = Union[Callable, None] + target_transform = Union[Callable, None] + regex = "*" + + def __init__(self, name, labels, transform=None, target_transform=None, regex="*"): + self.name = name + self.labels = read_file(labels) + self.files = list(Path(self.name).glob(regex)) + self.transform = transform + self.target_transform = target_transform + self.regex = regex + assert len(self.files) > 0, f"No files found in {self.name} with regex {regex}" + assert len(self.files) == len( + self.labels, + ), f"Number of files {len(self.files)} does not match number of labels {len(self.labels)}" + + def __getitem__(self, idx): + raise NotImplementedError("This method is not implemented yet.") + + def __len__(self): + return len(self.files) + + def __call__(self): + for X, y in self: + yield X, y + + +@dataclass +class TorchImageLoader(TorchBaseLoader): + + def __getitem__(self, idx): + file_path = self.files[idx] + image = read_image(file_path) + label = self.labels[idx] + if self.transform: + image = self.transform(image) + if self.target_transform: + label = self.target_transform(label) + return image, label + + +@dataclass +class TorchTextLoader(TorchBaseLoader): + + def __getitem__(self, idx): + file_path = self.files[idx] + with file_path.open("r") as f: + text = f.read() + label = self.labels[idx] + if self.transform: + text = self.transform(text) + if self.target_transform: + label = self.target_transform(label) + return text, label diff --git a/deckard/base/data/sampler.py b/deckard/base/data/sampler.py index de1c33d0..9f95c6a1 100644 --- a/deckard/base/data/sampler.py +++ b/deckard/base/data/sampler.py @@ -3,11 +3,13 @@ from copy import deepcopy from typing import Union from sklearn.model_selection import train_test_split + from ..utils import my_hash -__all__ = ["SklearnDataSampler"] logger = logging.getLogger(__name__) +__all__ = ["SklearnDataSampler"] + @dataclass class SklearnDataSampler: @@ -27,9 +29,6 @@ def __init__( stratify=False, time_series=False, ): - logger.info( - f"Instantiating {self.__class__.__name__} with params {asdict(self)}", - ) self.test_size = test_size self.train_size = train_size self.random_state = random_state diff --git a/deckard/base/data/sklearn_pipeline.py b/deckard/base/data/sklearn_pipeline.py index 0c725896..ccb73745 100644 --- a/deckard/base/data/sklearn_pipeline.py +++ b/deckard/base/data/sklearn_pipeline.py @@ -16,9 +16,6 @@ class SklearnDataPipelineStage: kwargs: dict = field(default_factory=dict) def __init__(self, name, **kwargs): - logger.info( - f"Instantiating {self.__class__.__name__} with name={name} and kwargs={kwargs}", - ) self.name = name self.kwargs = kwargs @@ -43,7 +40,10 @@ def __init__(self, **kwargs): pipe = kwargs.pop("pipeline", {}) pipe.update(**kwargs) for stage in pipe: - pipe[stage] = OmegaConf.to_container(pipe[stage], resolve=True) + pipe[stage] = OmegaConf.to_container( + OmegaConf.create(pipe[stage]), + resolve=True, + ) name = pipe[stage].pop("name", pipe[stage].pop("_target_", stage)) pipe[stage] = SklearnDataPipelineStage(name, **pipe[stage]) self.pipeline = pipe diff --git a/deckard/base/experiment/experiment.py b/deckard/base/experiment/experiment.py index 79d40f00..0f1828ed 100644 --- a/deckard/base/experiment/experiment.py +++ b/deckard/base/experiment/experiment.py @@ -29,7 +29,7 @@ class Experiment: name: Union[str, None] = field(default_factory=str) stage: Union[str, None] = field(default_factory=str) optimizers: Union[list, None] = field(default_factory=list) - device_id: str = None + device_id: str = "cpu" kwargs: Union[dict, None] = field(default_factory=dict) def __init__( @@ -38,7 +38,7 @@ def __init__( model: Model, scorers: ScorerDict, files: list, - device_id: str = None, + device_id: str = "cpu", attack: Attack = None, name=None, stage=None, @@ -84,7 +84,7 @@ def __init__( self.files = FileConfig(**files) elif isinstance(files, DictConfig): file_dict = OmegaConf.to_container(files, resolve=True) - self.files = FileConfig(**file_dict) + self.files = FileConfig(**file_dict, files=files) elif isinstance(files, FileConfig): self.files = files else: # pragma: no cover @@ -107,7 +107,6 @@ def __init__( self.optimizers = optimizers self.kwargs = kwargs self.name = name if name is not None else self._set_name() - logger.info("Instantiating Experiment with id: {}".format(self.get_name())) def __hash__(self): name = str(self.name).encode("utf-8") @@ -120,8 +119,6 @@ def __call__(self): :return: The score for the specified scorer or the status of the experiment if scorer=None (default). """ logger.info("Running experiment with id: {}".format(self.get_name())) - old_name = self.get_name() - old_hash = my_hash(self) # Setup files, data, and model files = deepcopy(self.files).get_filenames() @@ -141,28 +138,12 @@ def __call__(self): ######################################################################### # Load or generate data ######################################################################### - data_files = { - "data_file": files.get("data_file", None), - "train_labels_file": files.get("train_labels_file", None), - "test_labels_file": files.get("test_labels_file", None), - # "time_dict_file": files.get("score_dict_file", None), - # TODO data_score_file - } - data = self.data(**data_files) + data = self.data(**files) ######################################################################### # Load or train model ######################################################################### if self.model is not None: - model_files = { - "model_file": files.get("model_file", None), - "predictions_file": files.get("predictions_file", None), - "probabilities_file": files.get("probabilities_file", None), - "time_dict_file": files.get("score_dict_file", None), - "losses_file": files.get("losses_file", None), - # TODO train_score_file - # TODO test_score_file - } - model_results = self.model(data, **model_files) + model_results = self.model(data, **files) score_dict.update(**model_results.pop("time_dict", {})) score_dict.update(**model_results.pop("score_dict", {})) model = model_results["model"] @@ -186,7 +167,7 @@ def __call__(self): if not hasattr(losses, "shape"): losses = np.array(losses) logger.debug(f"losses shape: {losses.shape}") - else: # For experiments without models + else: # For experiments without models, e.g Mutual Information experiments on datasets preds = data[2] ########################################################################## # Load or run attack @@ -195,10 +176,7 @@ def __call__(self): adv_results = self.attack( data, model, - attack_file=files.get("attack_file", None), - adv_predictions_file=files.get("adv_predictions_file", None), - adv_probabilities_file=files.get("adv_probabilities_file", None), - adv_losses_file=files.get("adv_losses_file", None), + **files, ) if "adv_predictions" in adv_results: adv_preds = adv_results["adv_predictions"] @@ -260,13 +238,6 @@ def __call__(self): raise ValueError("Scorer is None. Please specify a scorer.") logger.info(f"Score for id : {self.get_name()}: {score_dict}") logger.info("Finished running experiment with id: {}".format(self.get_name())) - new_name = self.get_name() - assert ( - old_name == new_name - ), f"Experiment hash() name changed from {old_name} to {new_name}." - logger.debug( - f"Experiment deckard hash changed from {old_hash} to {my_hash(self)}.", - ) return score_dict def _set_name(self): diff --git a/deckard/base/files/files.py b/deckard/base/files/files.py index ba871710..da55a7c5 100644 --- a/deckard/base/files/files.py +++ b/deckard/base/files/files.py @@ -1,9 +1,9 @@ import logging from dataclasses import dataclass, field from pathlib import Path -from typing import Dict +from typing import Dict, Union from copy import deepcopy - +from omegaconf import OmegaConf from ..utils import my_hash logger = logging.getLogger(__name__) @@ -13,12 +13,12 @@ @dataclass class FileConfig: - reports: str = "reports" - data_dir: str = "data" - model_dir: str = "models" - attack_dir = "attacks" - name: str = None - stage: str = None + reports: Union[str, None] = "reports" + data_dir: Union[str, None] = "data" + model_dir: Union[str, None] = "models" + attack_dir: Union[str, None] = "attacks" + name: Union[str, None] = None + stage: Union[str, None] = None files: dict = field(default_factory=dict) def __init__( @@ -50,7 +50,7 @@ def __init__( :return: A FileConfig object. """ self._target_ = "deckard.base.files.FileConfig" - files.update(kwargs) + files = OmegaConf.merge(files, kwargs) self.reports = str(Path(reports).as_posix()) if reports is not None else None self.data_dir = str(Path(data_dir).as_posix()) if data_dir is not None else None self.model_dir = ( @@ -71,10 +71,13 @@ def __init__( if directory else None ) - self.name = name if name else None self.stage = stage if stage else None self.files = files if files else {} logger.debug(f"FileConfig init: {self.files}") + if name is None: + self.name = my_hash(self) + else: + self.name = name def __call__(self): files = dict(self.get_filenames()) diff --git a/deckard/base/model/art_pipeline.py b/deckard/base/model/art_pipeline.py index 6ac7d8b5..e21c0204 100644 --- a/deckard/base/model/art_pipeline.py +++ b/deckard/base/model/art_pipeline.py @@ -7,6 +7,7 @@ from omegaconf import DictConfig, OmegaConf import numpy as np from random import randint +from art.utils import to_categorical from .keras_models import KerasInitializer, keras_dict # noqa F401 from .tensorflow_models import ( # noqa F401 TensorflowV1Initializer, @@ -113,8 +114,22 @@ def __call__(self): and not isinstance(model, tuple(sklearn_dict.values())) ): model = SklearnModelInitializer( - data=data, model=model, library=library, **kwargs + data=data, + model=model, + library=library, + **kwargs, )() + try: + model.fit(data[0], data[2]) + except np.AxisError: + # Turn column vector, data[2], into a 2D array + nb_classes = len(np.unique(data[2])) if len(data[2]) > 2 else 2 + data[2] = to_categorical(data[2], nb_classes=nb_classes) + data[3] = to_categorical(data[3], nb_classes=nb_classes) + model.fit(data[0], data[2]) + if library in sklearn_dict and "art." not in str(type(model)): + est = sklearn_dict[library] + model = est(model, **kwargs) elif library in [ "tf2", "tensorflowv2", @@ -123,7 +138,10 @@ def __call__(self): "tfv2", ] and not isinstance(model, tuple(tensorflow_dict.values())): model = TensorflowV2Initializer( - data=data, model=model, library=library, **kwargs + data=data, + model=model, + library=library, + **kwargs, )() elif library in ["tf1", "tensorflowv1", "tfv1"] and not isinstance( model, diff --git a/deckard/base/model/keras_models.py b/deckard/base/model/keras_models.py index b0c4b5ca..ae1ca7f5 100644 --- a/deckard/base/model/keras_models.py +++ b/deckard/base/model/keras_models.py @@ -34,7 +34,6 @@ def __init__(self, name, **kwargs): def __call__(self): import tensorflow as tf - logger.info(f"Initializing model {self.name} with kwargs {self.kwargs}") if len(self.kwargs) > 0: config = {"class_name": self.name, "config": self.kwargs} else: @@ -128,7 +127,6 @@ def __init__(self, name, **kwargs): def __call__(self): import tensorflow as tf - logger.info(f"Initializing model {self.name} with kwargs {self.kwargs}") if len(self.kwargs) > 0: config = {"class_name": self.name, "config": self.kwargs} else: diff --git a/deckard/base/model/model.py b/deckard/base/model/model.py index 71e3de37..13d810f5 100644 --- a/deckard/base/model/model.py +++ b/deckard/base/model/model.py @@ -4,7 +4,7 @@ from pathlib import Path from time import process_time_ns, time from typing import Union, Dict -from omegaconf import OmegaConf, DictConfig, ListConfig +from omegaconf import OmegaConf, DictConfig from copy import deepcopy import numpy as np from sklearn.exceptions import NotFittedError @@ -42,50 +42,10 @@ def __init__(self, name, pipeline={}, **kwargs): def __call__(self): params = self.kwargs - logger.info(f"Initializing model {self.name} with kwargs {self.kwargs}") - if "input_dim" in params: - if isinstance(params["input_dim"], list): - params["input_dim"] = tuple(params["input_dim"]) - elif isinstance(params["input_dim"], int): - params["input_dim"] = params["input_dim"] - elif isinstance(params["input_dim"], ListConfig): - input_dim_list = tuple( - OmegaConf.to_container(params["input_dim"], resolve=True), - ) - if len(input_dim_list) == 1: - params["input_dim"] = input_dim_list[0] - else: - params["input_dim"] = tuple(input_dim_list) - else: # pragma: no cover - raise ValueError( - f"input_dim must be a list or tuple. Got {type(params['input_dim'])}", - ) - if "output_dim" in params: - if isinstance(params["output_dim"], list): - params["output_dim"] = tuple(params["output_dim"]) - elif isinstance(params["output_dim"], int): - params["output_dim"] = params["output_dim"] - elif isinstance(params["output_dim"], ListConfig): - output_dim_list = OmegaConf.to_container( - params["output_dim"], - resolve=True, - ) - if len(output_dim_list) == 1: - params["output_dim"] = output_dim_list[0] - else: - params["output_dim"] = tuple(output_dim_list) - else: # pragma: no cover - raise ValueError( - f"output_dim must be a list or tuple. Got {type(params['output_dim'])}", - ) name = params.pop("name", self.name) if self.pipeline is not None: pipeline = deepcopy(self.pipeline) obj = factory(name, **params) - # if isinstance(pipeline, DictConfig): - # pipeline = OmegaConf.to_container(pipeline, resolve=True) - # elif isinstance(pipeline, dict): - # pipeline = pipeline if is_dataclass(pipeline): pipeline = asdict(pipeline) else: # pragma: no cover @@ -107,7 +67,6 @@ class ModelTrainer: kwargs: dict = field(default_factory=dict) def __init__(self, **kwargs): - logger.info(f"Initializing model trainer with kwargs {kwargs}") self.kwargs = kwargs def __call__(self, data: list, model: object, library=None): @@ -129,13 +88,20 @@ def __call__(self, data: list, model: object, library=None): try: start = process_time_ns() start_timestamp = time() + logger.info(f"Fitting type(model): {type(model)} with kwargs {trainer}") model.fit(data[0], data[2], **trainer) end = process_time_ns() end_timestamp = time() except np.AxisError: # pragma: no cover from art.utils import to_categorical - data[2] = to_categorical(data[2]) + nb_classes = len(np.unique(data[2])) + if nb_classes < 2: + nb_classes = 2 + data[2] = np.squeeze(data[2]) + data[3] = np.squeeze(data[3]) + data[2] = to_categorical(data[2], nb_classes=nb_classes) + data[3] = to_categorical(data[3], nb_classes=nb_classes) start = process_time_ns() start_timestamp = time() model.fit(data[0], data[2], **trainer) @@ -146,6 +112,18 @@ def __call__(self, data: list, model: object, library=None): from art.utils import to_categorical nb_classes = len(np.unique(data[2])) + if nb_classes < 2: + nb_classes = 2 + data[2] = to_categorical(data[2], nb_classes=nb_classes) + data[3] = to_categorical(data[3], nb_classes=nb_classes) + start = process_time_ns() + start_timestamp = time() + model.fit(data[0], data[2], **trainer) + end = process_time_ns() + end_timestamp = time() + if "must be greater than or equal to 2" in str(e): + from art.utils import to_categorical + data[2] = to_categorical(data[2], nb_classes=nb_classes) start = process_time_ns() start_timestamp = time() @@ -167,7 +145,7 @@ def __call__(self, data: list, model: object, library=None): except Exception as e: raise e except RuntimeError as e: # pragma: no cover - if "eager mode" in str(e): + if "eager mode" in str(e) and library in tensorflow_dict.keys(): import tensorflow as tf tf.config.run_functions_eagerly(True) @@ -176,7 +154,9 @@ def __call__(self, data: list, model: object, library=None): model.fit(data[0], data[2], **trainer) end = process_time_ns() end_timestamp = time() - elif "should be the same" in str(e).lower(): + elif ( + "should be the same" in str(e).lower() and library in torch_dict.keys() + ): import torch device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -293,9 +273,6 @@ def __init__( else: self.art = None self.name = my_hash(self) if name is None else str(name) - logger.info( - f"Initializing model with data {self.data}, init {self.init}, trainer {self.trainer}, art {self.art}", - ) def __hash__(self): return int(my_hash(self), 16) @@ -310,20 +287,9 @@ def __call__( probabilities_file=None, time_dict_file=None, losses_file=None, + **kwargs, ): result_dict = {} - if isinstance(data, Data): - data = data.initialize(data_file) - elif isinstance(data, type(None)): - data = self.data.initialize(data_file) - elif isinstance(data, (str, Path)): - data = self.load(data) - assert isinstance( - data, - (type(None), list, tuple), - ), f"Data {data} is not a list. It is of type {type(data)}." - assert len(data) == 4, f"Data {data} is not a tuple of length 4." - result_dict["data"] = data if isinstance(model, Model): data, model = model.initialize(data) elif isinstance(model, type(None)): @@ -331,13 +297,38 @@ def __call__( assert len(data) == 4, f"Data {data} is not a tuple of length 4." elif isinstance(model, (str, Path)): model = self.load(model) + if isinstance(data, Data): + data = data(data_file=data_file) + elif isinstance(data, type(None)): + data = self.data.initialize(data_file) + elif isinstance(data, (str, Path)): + data = self.data.load(data) + else: + assert len(data) == 4, f"Data {data} is not a tuple of length 4." + assert isinstance( + data, + (list, tuple), + ), f"Data {data} is not a list. It is of type {type(data)}." elif hasattr(model, ("fit", "fit_generator")): assert hasattr(model, "predict") or hasattr( model, "predict_proba", ), f"Model {model} does not have a predict or predict_proba method." + if isinstance(data, Data): + data = data.initialize(data_file) + elif isinstance(data, type(None)): + data = self.data.initialize(data_file) + elif isinstance(data, (str, Path)): + data = self.load(data) + assert isinstance( + data, + (type(None), list, tuple), + ), f"Data {data} is not a list. It is of type {type(data)}." else: # pragma: no cover raise ValueError(f"Model {model} is not a valid model.") + assert len(data) == 4, f"Data {data} is not a tuple of length 4." + assert hasattr(model, "fit"), f"Model {model} does not have a fit method." + result_dict["data"] = data result_dict["model"] = model if predictions_file is not None and Path(predictions_file).exists(): @@ -417,13 +408,7 @@ def __call__( result_dict["probabilities"] = probs result_dict["time_dict"].update(**prob_time_dict) else: - probs, prob_time_dict = self.predict_proba( - data=data, - model=model, - probabilities_file=probabilities_file, - ) - result_dict["probabilities"] = probs - result_dict["time_dict"].update(**prob_time_dict) + pass # Predicting loss if losses_file is not None: loss, loss_time_dict = self.predict_log_loss( @@ -438,14 +423,7 @@ def __call__( loss = self.data.load(losses_file) result_dict["losses"] = loss else: - loss, loss_time_dict = self.predict_log_loss( - data=data, - model=model, - losses_file=losses_file, - ) - time_dict.update(**loss_time_dict) - result_dict["losses"] = loss - result_dict["time_dict"].update(**loss_time_dict) + pass if time_dict_file is not None: if Path(time_dict_file).exists(): old_time_dict = self.data.load(time_dict_file) diff --git a/deckard/base/model/sklearn_pipeline.py b/deckard/base/model/sklearn_pipeline.py index 631b39ed..03318512 100644 --- a/deckard/base/model/sklearn_pipeline.py +++ b/deckard/base/model/sklearn_pipeline.py @@ -1,15 +1,11 @@ import logging -from sklearn.utils.validation import check_is_fitted from typing import Dict, Union from dataclasses import dataclass, asdict, field, is_dataclass from omegaconf import DictConfig, OmegaConf from hydra.utils import instantiate from copy import deepcopy -import numpy as np -from sklearn.exceptions import NotFittedError from sklearn.base import BaseEstimator from sklearn.pipeline import Pipeline - from art.estimators.classification.scikitlearn import ( ScikitlearnAdaBoostClassifier, ScikitlearnBaggingClassifier, @@ -25,7 +21,6 @@ ScikitlearnDecisionTreeRegressor, ScikitlearnRegressor, ) -from art.utils import to_categorical from ..utils import my_hash @@ -162,16 +157,16 @@ def __call__(self, model): for stage in pipeline: stage = pipeline[stage] if isinstance(stage, dict): - stage[ - "_target_" - ] = "deckard.base.model.sklearn_pipeline.SklearnModelPipelineStage" + stage["_target_"] = ( + "deckard.base.model.sklearn_pipeline.SklearnModelPipelineStage" + ) stage = instantiate(stage) model = stage(model=model) elif isinstance(stage, DictConfig): stage = OmegaConf.to_container(stage, resolve=True) - stage[ - "_target_" - ] = "deckard.base.model.sklearn_pipeline.SklearnModelPipelineStage" + stage["_target_"] = ( + "deckard.base.model.sklearn_pipeline.SklearnModelPipelineStage" + ) stage = instantiate(stage) model = stage(model=model) elif isinstance(stage, SklearnModelPipelineStage): @@ -222,10 +217,7 @@ def __init__(self, data, model=None, library="sklearn", pipeline={}, **kwargs): self.data = data self.model = model self.library = library - params = deepcopy(kwargs) - # while "kwargs" in params: - # params.update(**params.pop("kwargs")) - self.kwargs = params + self.kwargs = kwargs if len(pipeline) > 0: self.pipeline = SklearnModelPipeline(**pipeline) else: @@ -233,71 +225,43 @@ def __init__(self, data, model=None, library="sklearn", pipeline={}, **kwargs): def __call__(self): logger.debug(f"Initializing model {self.model} with kwargs {self.kwargs}") - data = self.data model = self.model - library = self.library - kwargs = {} - params = deepcopy(self.kwargs) - if "library" in kwargs: - library = kwargs.pop("library") - if "clip_values" in params: - clip_values = params.pop("clip_values") - kwargs["clip_values"] = tuple(clip_values) + if isinstance(model, BaseEstimator): + pass + elif isinstance(model, DictConfig): + model = OmegaConf.to_container(model, resolve=True) + elif isinstance(model, str): + model = {"name": model, **self.kwargs} + else: + assert "art." in str( + type(model), + ), f"model must be a string, dict, or sklearn estimator. Got {type(model)}" + if isinstance(model, dict): + if "name" in model: + name = model.pop("name") + else: + raise ValueError( + f"model must have a name attribute. Got {model}", + ) + model["_target_"] = name + model = instantiate(model) else: - X_train, _, _, _ = data - kwargs.update({"clip_values": (np.amin(X_train), np.amax(X_train))}) - if "preprocessing" not in params: - if len(data[0].shape) > 2: - mean = np.mean(data[0], axis=0) - std = np.std(data[0], axis=0) - pre_tup = (mean, std) + if hasattr(model, "model"): + assert isinstance( + model.model, + BaseEstimator, + ), f"model must be a sklearn estimator. Got {type(model.model)}" else: - pre_tup = (np.mean(data[0]), np.std(data[0])) - kwargs["preprocessing"] = pre_tup - if "preprocessing_defences" in params: - preprocessing_defences = params.pop("preprocessing_defences") - kwargs["preprocessing_defences"] = preprocessing_defences - if "postprocessing_defences" in params: - postprocessing_defences = params.pop("postprocessing_defences") - kwargs["postprocessing_defences"] = postprocessing_defences + assert isinstance( + model, + BaseEstimator, + ), f"model must be a sklearn estimator. Got {type(model)}" if self.pipeline is not None: - obj = self.pipeline(model) + model = self.pipeline(model) assert isinstance( - obj, + model, BaseEstimator, ), f"model must be a sklearn estimator. Got {type(model)}" - else: - obj = model - if library in sklearn_dict and "art." not in str(type(model)): - est = sklearn_dict[library] - try: - check_is_fitted(obj) - except NotFittedError: - try: - obj.fit(data[0], data[2]) - except np.AxisError as e: - logger.warning(e) - logger.warning( - "Error while fitting model. Attempting to reshape data", - ) - if len(np.squeeze(data[2]).shape) > 1: - nb_classes = np.squeeze(data[2]).shape[1] - else: - nb_classes = len(np.unique(data[2])) - y_train = to_categorical(data[2], nb_classes) - obj.fit(data[0], y_train) - except ValueError as e: - if "Found array with dim 3. Estimator expected <= 2." in str(e): - obj.fit(data[0].reshape(data[0].shape[0], -1), data[2]) - elif "y should be a 1d array, got an array of shape" in str(e): - obj.fit(data[0], np.argmax(data[2], axis=1)) - else: - raise e - model = est(obj, **kwargs) - elif "art." in str(type(model)): - model = obj - else: - raise ValueError(f"library must be one of {sklearn_models}. Got {library}") assert hasattr( model, "fit", diff --git a/deckard/base/model/torch_models.py b/deckard/base/model/torch_models.py index 9e532c96..748c6017 100644 --- a/deckard/base/model/torch_models.py +++ b/deckard/base/model/torch_models.py @@ -88,11 +88,11 @@ def __call__(self): kwargs.update(**kwargs.pop("kwargs", {})) data = self.data optimizer = TorchOptimizer( - **kwargs.pop("optimizer", {"name": "torch.optim.Adam"}) + **kwargs.pop("optimizer", {"name": "torch.optim.Adam"}), )(model) kwargs.update({"optimizer": optimizer}) criterion = TorchCriterion( - **kwargs.pop("criterion", {"name": "torch.nn.CrossEntropyLoss"}) + **kwargs.pop("criterion", {"name": "torch.nn.CrossEntropyLoss"}), )() kwargs.update({"loss": criterion}) if "input_shape" not in kwargs: diff --git a/deckard/base/scorer/scorer.py b/deckard/base/scorer/scorer.py index a4dcbfb9..52429e44 100644 --- a/deckard/base/scorer/scorer.py +++ b/deckard/base/scorer/scorer.py @@ -156,7 +156,11 @@ def load(self, filename): return scores def __call__( - self, *args, score_dict_file=None, labels_file=None, predictions_file=None + self, + *args, + score_dict_file=None, + labels_file=None, + predictions_file=None, ): new_scores = {} args = list(args) diff --git a/deckard/base/utils/factory.py b/deckard/base/utils/factory.py index e5fffd07..7116fded 100644 --- a/deckard/base/utils/factory.py +++ b/deckard/base/utils/factory.py @@ -8,7 +8,7 @@ def flatten_dict(dictionary: dict, separator: str = ".", prefix: str = ""): """ - Flattens a dictionary into a list of dictionarieswith keys separated by the separator. + Flattens a dictionary into a list of dictionarie swith keys separated by the separator. :param dictionary: The dictionary to flatten. :param separator: The separator to use when flattening the dictionary. :param prefix: The prefix to use when flattening the dictionary. diff --git a/deckard/layers/afr.py b/deckard/layers/afr.py index e7e22bcf..8312d67a 100644 --- a/deckard/layers/afr.py +++ b/deckard/layers/afr.py @@ -30,11 +30,40 @@ def plot_aft( xlabel=None, ylabel=None, replacement_dict={}, - filetype=".pdf", + filetype=".eps", folder=".", + legend={}, **kwargs, ): file = Path(folder, file).with_suffix(filetype) + aft = fit_aft(df, event_col, duration_col, mtype, kwargs) + columns = list(df.columns) + columns.remove(event_col) + columns.remove(duration_col) + ax = aft.plot(columns=columns) + labels = ax.get_yticklabels() + labels = [label.get_text() for label in labels] + for k, v in replacement_dict.items(): + labels = [label.replace(k, v) for label in labels] + values = ax.get_yticks().tolist() + # sort labels by values + labels = [x for _, x in sorted(zip(values, labels))] + values = [x for x, _ in sorted(zip(values, labels))] + ax.set_yticks(values) + ax.set_yticklabels(labels, fontsize=12) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.set_title(title) + ax.legend(**legend) + ax.get_figure().tight_layout() + ax.get_figure().savefig(file) + logger.info(f"Saved graph to {file}") + plt.show() + plt.gcf().clear() + return ax, aft + + +def fit_aft(df, event_col, duration_col, mtype, kwargs): if mtype == "weibull": aft = WeibullAFTFitter(**kwargs) elif mtype == "log_normal": @@ -51,22 +80,14 @@ def plot_aft( event_col in df.columns ), f"Column {event_col} not in dataframe with columns {df.columns}" plt.gcf().clear() - aft.fit(df, duration_col=duration_col, event_col=event_col) - ax = aft.plot() - labels = ax.get_yticklabels() - labels = [label.get_text() for label in labels] - for k, v in replacement_dict.items(): - labels = [label.replace(k, v) for label in labels] - ax.set_yticklabels(labels) - ax.set_xlabel(xlabel) - ax.set_ylabel(ylabel) - ax.set_title(title) - ax.get_figure().tight_layout() - ax.get_figure().savefig(file) - logger.info(f"Saved graph to {file}") - plt.show() - plt.gcf().clear() - return ax, aft + assert duration_col in df.columns, f"{duration_col} not in df.columns" + assert event_col in df.columns, f"{event_col} not in df.columns" + aft.fit( + df, + duration_col=duration_col, + event_col=event_col, + ) + return aft def plot_partial_effects( @@ -74,35 +95,40 @@ def plot_partial_effects( covariate_array, values_array, title=None, - file="partial_effects.pdf", + file="partial_effects.eps", xlabel="Covariate", ylabel="Failure rate", legend_kwargs={"loc": "upper left"}, replacement_dict={}, cmap="coolwarm", folder=".", - filetype=".pdf", + filetype=".eps", **kwargs, ): plt.gcf().clear() file = Path(folder, file).with_suffix(filetype) - pareto = aft.plot_partial_effects_on_outcome( - covariate_array, values_array, cmap=cmap, **kwargs + partial_effects = aft.plot_partial_effects_on_outcome( + covariate_array, + values_array, + cmap=cmap, + **kwargs, ) - labels = pareto.get_yticklabels() + labels = partial_effects.get_yticklabels() labels = [label.get_text() for label in labels] + values = partial_effects.get_yticks().tolist() for k, v in replacement_dict.items(): labels = [label.replace(k, v) for label in labels] - pareto.set_yticklabels(labels) - pareto.legend(**legend_kwargs) - pareto.set_ylabel(ylabel) - pareto.set_xlabel(xlabel) - pareto.set_title(title) - pareto.get_figure().tight_layout() - pareto.get_figure().savefig(file) + partial_effects.set_yticks(values) + partial_effects.set_yticklabels(labels) + partial_effects.legend(**legend_kwargs) + partial_effects.set_ylabel(ylabel) + partial_effects.set_xlabel(xlabel) + partial_effects.set_title(title) + partial_effects.get_figure().tight_layout() + partial_effects.get_figure().savefig(file) logger.info(f"Saved graph to {file}") plt.gcf().clear() - return pareto + return partial_effects def score_model(aft, train, test): @@ -113,15 +139,20 @@ def score_model(aft, train, test): return scores -def make_afr_table(score_list, aft_dict, dataset, X_train, folder="."): - pd.set_option("display.float_format", lambda x: "%.3f" % x) +def make_afr_table( + score_list, + aft_dict, + dataset, + X_train, + folder=".", + filename="aft_comparison", +): assert len(score_list) == len( aft_dict, ), "Length of score list and aft dict must be equal" folder = Path(folder) aft_data = pd.DataFrame() - aft_data.index.name = "Model" - aft_data.index = aft_dict.keys() + aft_data["AIC"] = [ x.AIC_ if not isinstance(x, CoxPHFitter) else np.nan for x in aft_dict.values() ] @@ -131,20 +162,31 @@ def make_afr_table(score_list, aft_dict, dataset, X_train, folder="."): ] # aft_data["Train LL"] = [x["train_score"] for x in score_list] # aft_data["Test LL"] = [x["test_score"] for x in score_list] - aft_data["Mean $S(t;\\theta)$"] = [ + aft_data[r"Mean $S(t;\theta)$"] = [ x.predict_expectation(X_train).mean() for x in aft_dict.values() ] - aft_data["Median $S(t;\\theta)$"] = [ + aft_data[r"Median $S(t;\theta)$"] = [ x.predict_median(X_train).median() for x in aft_dict.values() ] - aft_data.to_csv(folder / "aft_comparison.csv") + label = f"tab:{dataset}" + upper = dataset.upper() + aft_data.index.name = "Distribution" + aft_data.index = [str(k).replace("_", " ").capitalize() for k in aft_dict.keys()] + aft_data.to_csv(folder / "aft_comparison.csv", na_rep="--") logger.info(f"Saved AFT comparison to {folder / 'aft_comparison.csv'}") - aft_data.fillna("--", inplace=True) aft_data.to_latex( - folder / "aft_comparison.tex", + buf=folder / f"{filename}.tex", float_format="%.3g", - label=f"tab:{dataset}", - caption=f"Comparison of AFR Models on the {dataset.upper()} dataset.", + na_rep="--", + label=label, + index_names=True, + caption=f"Comparison of AFR Models on the {upper} dataset.", + escape=False, + ) + aft_data.to_csv( + Path(folder / f"{filename}.csv"), + index_label="Distribution", + na_rep="--", ) return aft_data @@ -153,7 +195,7 @@ def make_afr_table(score_list, aft_dict, dataset, X_train, folder="."): def clean_data_for_aft( data, covariate_list, - target="adv_failure_rate", + target="adv_accuracy", ): subset = data.copy() assert ( @@ -162,17 +204,22 @@ def clean_data_for_aft( logger.info(f"Shape of dirty data: {subset.shape}") cleaned = pd.DataFrame() covariate_list.append(target) + logger.info(f"Covariates : {covariate_list}") for kwarg in covariate_list: assert kwarg in subset.columns, f"{kwarg} not in data.columns" cleaned = pd.concat([cleaned, subset[kwarg]], axis=1) cols = cleaned.columns cleaned = pd.DataFrame(subset, columns=cols) + cleaned.index = subset.index + # remove rows with -1e10 or 1e10, which are placeholders for run-time errors depending on the direction of optimization for col in cols: cleaned = cleaned[cleaned[col] != -1e10] cleaned = cleaned[cleaned[col] != 1e10] - cleaned.dropna(inplace=True, how="any", axis=0) + # Convert categorical variables to C-1 dummy variables where C is the number of categories cleaned = pd.get_dummies(cleaned) + # de-duplicate index + cleaned = cleaned.loc[~cleaned.index.duplicated(keep="first")] assert ( target in cleaned ), f"Target {target} not in dataframe with columns {cleaned.columns}" @@ -204,6 +251,8 @@ def split_data_for_aft( ), f"Duration {duration_col} not in dataframe with columns {cleaned.columns}" X_train = X_train.dropna(axis=0, how="any") X_test = X_test.dropna(axis=0, how="any") + X_train = pd.DataFrame(X_train, columns=cleaned.columns) + X_test = pd.DataFrame(X_test, columns=cleaned.columns) return X_train, X_test @@ -226,7 +275,9 @@ def render_afr_plot(mtype, config, X_train, X_test, target, duration_col, folder score = score_model(aft, X_train, X_test) for partial_effect_dict in partial_effect_list: partial_effect_plot = plot_partial_effects( - aft=aft, **partial_effect_dict, folder=folder + aft=aft, + **partial_effect_dict, + folder=folder, ) plots.append(partial_effect_plot) return aft, plots, score @@ -239,6 +290,7 @@ def render_all_afr_plots( data, dataset, test_size=0.8, + filename="aft_comparison", folder=".", ): covariate_list = config.pop("covariates", []) @@ -266,7 +318,14 @@ def render_all_afr_plots( folder=folder, ) score_list = list(scores.values()) - aft_data = make_afr_table(score_list, models, dataset, X_train, folder=folder) + aft_data = make_afr_table( + score_list, + models, + dataset, + X_train, + folder=folder, + filename=filename, + ) print("*" * 80) print("*" * 34 + " RESULTS " + "*" * 34) print("*" * 80) @@ -274,6 +333,25 @@ def render_all_afr_plots( print("*" * 80) +def set_matplotlib_vars(matplotlib_dict=None): + if matplotlib_dict is None: + matplotlib_dict = { + "font": { + "family": "Times New Roman", + "weight": "bold", + "size": 22, + }, + } + matplotlib.rc(**matplotlib_dict) + + +def fillna(data, config): + fillna = config.pop("fillna", {}) + for k, v in fillna.items(): + assert k in data.columns, f"{k} not in data" + data[k] = data[k].fillna(v) + + if "__main__" == __name__: afr_parser = argparse.ArgumentParser() afr_parser.add_argument("--target", type=str, default="adv_failures") @@ -282,46 +360,62 @@ def render_all_afr_plots( afr_parser.add_argument("--data_file", type=str, default="data.csv") afr_parser.add_argument("--config_file", type=str, default="afr.yaml") afr_parser.add_argument("--plots_folder", type=str, default="plots") + afr_parser.add_argument("--summary_file", type=str, default="aft_comparison") args = afr_parser.parse_args() target = args.target duration_col = args.duration_col dataset = args.dataset logging.basicConfig(level=logging.INFO) - font = { - "family": "Times New Roman", - "weight": "bold", - "size": 22, - } - - matplotlib.rc("font", **font) + set_matplotlib_vars() + # Filesystem stuff csv_file = args.data_file FOLDER = args.plots_folder + filename = ( + Path(args.summary_file).as_posix() if args.summary_file is not None else None + ) + assert Path(args.config_file).exists(), f"{args.config_file} does not exist." Path(FOLDER).mkdir(exist_ok=True, parents=True) + assert Path(FOLDER).exists(), f"{FOLDER} does not exist." + assert Path(csv_file).exists(), f"{csv_file} does not exist." + + # Reading compiled csv file data = pd.read_csv(csv_file, index_col=0) logger.info(f"Shape of data: {data.shape}") data.columns = data.columns.str.strip() with Path(args.config_file).open("r") as f: config = yaml.safe_load(f) - fillna = config.pop("fillna", {}) - for k, v in fillna.items(): - assert k in data.columns, f"{k} not in data" - data[k] = data[k].fillna(v) + fillna(data, config) + + # Strip whitespace from strings data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x) - assert Path(args.config_file).exists(), f"{args.config_file} does not exist." + + # Check if covariates are specified covariates = config.get("covariates", []) assert len(covariates) > 0, "No covariates specified in config file" + + # Cannot fit AFT models with missing values logger.info(f"Shape of data before data before dropping na: {data.shape}") data = drop_frames_without_results(data, covariates) logger.info(f"Shape of data before data before dropping na: {data.shape}") - data.loc[:, "adv_failures"] = (1 - data.loc[:, "adv_accuracy"]) * data.loc[ - :, - "attack.attack_size", - ] - data.loc[:, "ben_failures"] = (1 - data.loc[:, "accuracy"]) * data.loc[ - :, - "attack.attack_size", - ] + # Converting accuracy to unnormalized count, if needed + if "adv_failures" in covariates and "adv_failures" in data.columns: + logger.info("Adding adv_failures to data") + assert "adv_accuracy" in data.columns, "adv_accuracy not in data" + assert "attack.attack_size" in data.columns, "attack.attack_size not in data" + data.loc[:, "adv_failures"] = (1 - data.loc[:, "adv_accuracy"]) * data.loc[ + :, + "attack.attack_size", + ] + if "ben_failures" in covariates: + logger.info("Adding ben_failures to data") + assert "accuracy" in data.columns, "accuracy not in data" + assert "attack.attack_size" in data.columns, "attack.attack_size not in data" + data.loc[:, "ben_failures"] = (1 - data.loc[:, "accuracy"]) * data.loc[ + :, + "data.sample.test_size", + ] + # Plotting AFT models render_all_afr_plots( config, duration_col, @@ -330,4 +424,5 @@ def render_all_afr_plots( dataset, test_size=0.8, folder=FOLDER, + filename=filename, ) diff --git a/deckard/layers/attack.py b/deckard/layers/attack.py new file mode 100644 index 00000000..ebe34146 --- /dev/null +++ b/deckard/layers/attack.py @@ -0,0 +1,53 @@ +#! /usr/bin/env python + +import logging +from pathlib import Path +import argparse + + +from .utils import save_params_file, run_stages + +logger = logging.getLogger(__name__) + +attack_parser = argparse.ArgumentParser() +attack_parser.add_argument("stage", type=str, nargs="*", default=None) +attack_parser.add_argument("--verbosity", type=str, default="INFO") +attack_parser.add_argument("--params_file", type=str, default="params.yaml") +attack_parser.add_argument("--pipeline_file", type=str, default="dvc.yaml") +attack_parser.add_argument("--config_dir", type=str, default="conf") +attack_parser.add_argument("--config_file", type=str, default="default") +attack_parser.add_argument("--workdir", type=str, default=".") +attack_parser.add_argument("--overrides", nargs="*", default=[], type=str) + + +def attack_main(args): + config_dir = Path(args.workdir, args.config_dir).absolute().as_posix() + logging.basicConfig( + level=args.verbosity, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + if args.overrides is not None and len(args.overrides) > 0: + save_params_file( + config_dir=config_dir, + config_file=args.config_file, + params_file=args.params_file, + overrides=args.overrides, + ) + logger.info( + f"Using existing params file {args.params_file} in directory {args.workdir}", + ) + results = run_stages( + stages=args.stage, + pipeline_file=args.pipeline_file, + params_file=args.params_file, + repo=args.workdir, + config_dir=config_dir, + config_file=args.config_file, + sub_dict="attack", + ) + return results + + +if __name__ == "__main__": + args = attack_parser.parse_args() + attack_main(args) diff --git a/deckard/layers/clean_data.py b/deckard/layers/clean_data.py index ab7aee73..609ce48c 100644 --- a/deckard/layers/clean_data.py +++ b/deckard/layers/clean_data.py @@ -5,7 +5,6 @@ import pandas as pd import seaborn as sns import yaml -from math import isnan import numpy as np from tqdm import tqdm @@ -63,34 +62,47 @@ def calculate_failure_rate(data): data = data[data.columns.drop(list(data.filter(regex=r"\.1$")))] data.columns.str.replace(" ", "") assert "accuracy" in data.columns, "accuracy not in data.columns" + data.loc[:, "accuracy"] = pd.to_numeric(data.loc[:, "accuracy"]) assert ( "attack.attack_size" in data.columns ), "attack.attack_size not in data.columns" + data.loc[:, "attack.attack_size"] = pd.to_numeric( + data.loc[:, "attack.attack_size"], + ) assert ( "predict_time" in data.columns or "predict_proba_time" in data.columns ), "predict_time or predict_proba_time not in data.columns" assert "adv_accuracy" in data.columns, "adv_accuracy not in data.columns" + data.loc[:, "adv_accuracy"] = pd.to_numeric(data.loc[:, "adv_accuracy"]) assert "adv_fit_time" in data.columns, "adv_fit_time not in data.columns" + data.loc[:, "adv_fit_time"] = pd.to_numeric(data.loc[:, "adv_fit_time"]) assert "train_time" in data.columns, "train_time not in data.columns" + data.loc[:, "train_time"] = pd.to_numeric(data.loc[:, "train_time"]) if "predict_time" in data.columns: + data.loc[:, "predict_time"] = pd.to_numeric(data.loc[:, "predict_time"]) failure_rate = ( - 1 - data.loc[:, "accuracy"] * data.loc[:, "attack.attack_size"] + (1 - data.loc[:, "accuracy"]) * data.loc[:, "attack.attack_size"] ) / data.loc[:, "predict_time"] elif "predict_proba_time" in data.columns: + data.loc[:, "predict_proba_time"] = pd.to_numeric( + data.loc[:, "predict_proba_time"], + ) failure_rate = ( - 1 - data.loc[:, "accuracy"] * data.loc[:, "attack.attack_size"] + (1 - data.loc[:, "accuracy"]) * data.loc[:, "attack.attack_size"] ) / data.loc[:, "predict_proba_time"] else: raise ValueError("predict_time or predict_proba_time not in data.columns") adv_failure_rate = ( - 1 - data.loc[:, "adv_accuracy"] * data.loc[:, "attack.attack_size"] - ) / data.loc[:, "predict_time"] + (1 - data.loc[:, "adv_accuracy"]) + * data.loc[:, "attack.attack_size"] + / data.loc[:, "predict_time"] + ) data = data.assign(adv_failure_rate=adv_failure_rate) data = data.assign(failure_rate=failure_rate) training_time_per_failure = data.loc[:, "train_time"] / data.loc[:, "failure_rate"] training_time_per_adv_failure = ( - data.loc[:, "train_time_per_sample"] * data.loc[:, "adv_failure_rate"] + data.loc[:, "train_time"] * data.loc[:, "adv_failure_rate"] ) data = data.assign(training_time_per_failure=training_time_per_failure) data = data.assign(training_time_per_adv_failure=training_time_per_adv_failure) @@ -172,17 +184,24 @@ def min_max_scaling(data, *args): # Min-max scaling of control parameters for def_ in defences: max_ = data[data.def_gen == def_].def_value.max() + max_ = pd.to_numeric(max_, errors="raise") min_ = data[data.def_gen == def_].def_value.min() + min_ = pd.to_numeric(min_, errors="raise") scaled_value = (data[data.def_gen == def_].def_value - min_) / (max_ - min_) data.loc[data.def_gen == def_, "def_value"] = scaled_value for atk in attacks: max_ = data[data.atk_gen == atk].atk_value.max() + max_ = pd.to_numeric(max_, errors="raise") min_ = data[data.atk_gen == atk].atk_value.min() + min_ = pd.to_numeric(min_, errors="raise") scaled_value = (data[data.atk_gen == atk].atk_value - min_) / (max_ - min_) data.loc[data.atk_gen == atk, "atk_value"] = scaled_value for k in args: max_ = data[k].max() + max_ = pd.to_numeric(max_, errors="raise") min_ = data[k].min() + min_ = pd.to_numeric(min_, errors="raise") + data[k] = pd.to_numeric(data[k], errors="raise") scaled_value = (data[k] - min_) / (max_ - min_) data[k] = scaled_value return data @@ -200,13 +219,6 @@ def merge_defences( "model.art.transformer.name", "model.art.trainer.name", ], - control_variable=["device_id"], - defaults={ - # "model.trainer.nb_epoch": 20, - # "model.trainer.kwargs.nb_epoch": 20, - # "model.trainer.batch_size" : 1024, - # "model.trainer.kwargs.batch_size" : 1024, - }, ): """ The function `merge_defences` merges different defence columns in a DataFrame and assigns a unique @@ -228,8 +240,6 @@ def merge_defences( """ defences = [] def_gens = [] - for control in control_variable: - assert control in results, f"{control} not in results.columns" for _, entry in tqdm(results.iterrows(), desc="Merging defences"): defence = [] i = 0 @@ -239,21 +249,6 @@ def merge_defences( else: pass i += 1 - for k, v in defaults.items(): - if ( - k in entry - and v != entry[k] - and not isnan(pd.to_numeric(entry[k])) - and len(defence) == 0 - ): - defence.append(k) - else: - pass - for col in control_variable: - if col in entry and entry[col] not in nones and len(defence) == 0: - defence.append(col) - else: - pass ############################################################################################################ if len(defence) > 1: def_gen = [str(x).split(".")[-1] for x in defence] @@ -271,6 +266,7 @@ def merge_defences( results["defence_name"] = defences results["def_gen"] = def_gens logger.info(f"Unique defences after merging: {set(results.def_gen)}") + logger.info(f"Unique set of full names after merge: {set(results.defence_name)}") assert hasattr(results, "def_gen"), "def_gen not in results.columns" return results @@ -297,8 +293,9 @@ def merge_attacks(results: pd.DataFrame): attack = None attacks.append(attack) if attacks != [None] * len(attacks): - results["attack_name"] = attacks - results["atk_gen"] = [str(x).split(".")[-1] for x in attacks] + results = results.assign(attack_name=attacks) + attacks = [str(x).split(".")[-1] for x in attacks] + results = results.assign(atk_gen=attacks) logger.info(f"Unique attacks: {set(results.atk_gen)}") else: logger.warning("No attacks found in data. Check your config file.") @@ -342,7 +339,7 @@ def format_control_parameter(data, control_dict, fillna): logger.info("Fillna: ") logger.info(yaml.dump(fillna)) for defence in defences: - if defence in control_dict and defence != "Epochs": + if defence in control_dict: # Get parameter name from control_dict param = control_dict[defence] # Shorten parameter name @@ -375,6 +372,7 @@ def format_control_parameter(data, control_dict, fillna): else value ) data.loc[data.def_gen == defence, "def_value"] = value + del fillna[defence] else: logger.warning(f"Defence {defence} not in control_dict. Deleting rows.") data = data[data.def_gen != defence] @@ -414,17 +412,38 @@ def format_control_parameter(data, control_dict, fillna): else value ) data.loc[data.atk_gen == attack, "def_value"] = value + del fillna[attack] else: logger.warning(f"Attack {attack} not in control_dict. Deleting rows.") data = data[data.atk_gen != attack] - defences = list(data.def_gen.unique()) - attacks = list(data.atk_gen.unique()) + defences = list(data.def_gen.unique()) if "def_gen" in data.columns else [] + attacks = list(data.atk_gen.unique()) if "atk_gen" in data.columns else [] logger.info(f"Unique defences: {defences}") logger.info(f"Unique attacks: {attacks}") - assert "def_param" in data.columns, "def_param not in data.columns" - assert "atk_param" in data.columns, "atk_param not in data.columns" - assert "def_value" in data.columns, "def_value not in data.columns" - assert "atk_value" in data.columns, "atk_value not in data.columns" + if len(defences) > 0: + assert "def_param" in data.columns, "def_param not in data.columns" + assert "def_value" in data.columns, "def_value not in data.columns" + if len(attacks) > 0: + assert "atk_param" in data.columns, "atk_param not in data.columns" + assert "atk_value" in data.columns, "atk_value not in data.columns" + return data, fillna + + +def replace_strings_in_data(data, replace_dict): + for k, v in replace_dict.items(): + logger.info(f"Replacing strings in {k}...") + assert isinstance( + v, + dict, + ), f"Value for key {k} in replace_dict is not a dictionary." + assert k in data.columns, f"Key {k} not in data.columns." + for k1, v1 in v.items(): + logger.info(f"Replacing {k1} with {v1} in {k}...") + k1 = str(k1) + v1 = str(v1) + data[k] = data[k].astype(str) + data.loc[:, k] = data.loc[:, k].str.replace(k1, v1) + logger.info(f"Unique values after replacement: {data[k].unique()}") return data @@ -434,6 +453,8 @@ def clean_data_for_plotting( atk_gen_dict, control_dict, fillna, + replace_dict, + pareto_dict, ): """ The function `clean_data_for_plotting` cleans and formats data for plotting by dropping empty rows, @@ -465,99 +486,115 @@ def clean_data_for_plotting( data = data.loc[:, ~data.columns.str.endswith(".1")] logger.info(f"Shape after dropping poorly merged columns: {data.shape}") logger.info("Shortening model names...") - # Removes the path and to the model object and leaves the name of the model - model_names = data["model.init.name"].str.split(".").str[-1] - data["model_name"] = model_names # If "Net" is in the model name, we assume the following string denotes the layers as in ResNet18 if hasattr(data, "model.init.name"): model_names = data["model.init.name"].str.split(".").str[-1] - data.loc[:, "model_name"] = model_names + data = data.assign(model_name=model_names) model_layers = [str(x).split("Net")[-1] for x in model_names] - data.loc[:, "model_layers"] = model_layers + data = data.assign(model_layers=model_layers) logger.info(f"Model Names: {data.model_name.unique()}") logger.info(f"Model Layers: {data.model_layers.unique()}") - data["nb_epoch"] = ( - data["model.trainer.kwargs.nb_epoch"] - if "model.trainer.kwargs.nb_epoch" in data.columns - else data["model.trainer.nb_epoch"] - ) + logger.info("Replacing data.sample.random_state with random_state...") data["data.sample.random_state"].rename("random_state", inplace=True) - data = merge_defences(data) + if len(def_gen_dict) > 0: + data = merge_defences(data) logger.info("Replacing attack and defence names with short names...") if hasattr(data, "def_gen"): def_gen = data.def_gen.map(def_gen_dict) data.def_gen = def_gen data.dropna(axis=0, subset=["def_gen"], inplace=True) - data = merge_attacks(data) + if "attack.init.name" in data: + data = merge_attacks(data) if hasattr(data, "atk_gen"): atk_gen = data.atk_gen.map(atk_gen_dict) data.atk_gen = atk_gen data.dropna(axis=0, subset=["atk_gen"], inplace=True) - data = format_control_parameter(data, control_dict, fillna) + data, fillna = format_control_parameter(data, control_dict, fillna) + for k, v in fillna.items(): + if k in data.columns: + data[k] = data[k].fillna(v) + else: + data[k] = str(v) + data = replace_strings_in_data(data, replace_dict) + if len(pareto_dict) > 0: + data = pareto_set(data, pareto_dict) return data -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "-i", - "--input_file", - type=str, - help="Data file to read from", - required=True, - ) - parser.add_argument( - "-o", - "--output_file", - type=str, - help="Data file to read from", - required=True, - ) - parser.add_argument( - "-v", - "--verbosity", - default="INFO", - help="Increase output verbosity", - ) - parser.add_argument( - "-c", - "--config", - help="Path to the config file", - default="clean.yaml", - ) - parser.add_argument( - "-s", - "--subset", - help="Subset of data you would like to plot", - default=None, - nargs="?", - ) - parser.add_argument( - "-d", - "--drop_if_empty", - help="Drop row if this columns is empty", - nargs="+", - type=str, - default=[ - "accuracy", - "adv_accuracy", - "train_time", - "adv_fit_time", - "predict_proba_time", - ], - ) - parser.add_argument( - "--pareto_dict", - help="Path to (optional) pareto set dictionary.", - default=None, - ) - args = parser.parse_args() +def drop_values(data, drop_dict): + for k, v in drop_dict.items(): + data = data[data[k] != v] + return data + + +parser = argparse.ArgumentParser() +parser.add_argument( + "-i", + "--input_file", + type=str, + help="Data file to read from", + required=True, +) +parser.add_argument( + "-o", + "--output_file", + type=str, + help="Data file to read from", + required=True, +) +parser.add_argument( + "-v", + "--verbosity", + default="INFO", + help="Increase output verbosity", +) +parser.add_argument( + "-c", + "--config", + help="Path to the config file", + default="clean.yaml", +) +parser.add_argument( + "-s", + "--subset", + help="Subset of data you would like to plot", + default=None, + nargs="?", +) +parser.add_argument( + "-d", + "--drop_if_empty", + help="Drop row if this columns is empty", + nargs="+", + type=str, + default=[ + "accuracy", + "train_time", + "predict_time", + ], +) +parser.add_argument( + "--pareto_dict", + help="Path to (optional) pareto set dictionary.", + default=None, +) + + +def main(args): logging.basicConfig(level=args.verbosity) assert Path( args.input_file, ).exists(), f"File {args.input_file} does not exist. Please specify a valid file using the -i flag." data = pd.read_csv(args.input_file) + # Strip whitespace from column names + trim_strings = lambda x: x.strip() if isinstance(x, str) else x # noqa E731 + data.rename(columns=trim_strings, inplace=True) + # Strip whitespace from column values + data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x) + + assert "model.init.name" in data.columns, "model.init.name not in data.columns" + if isinstance(args.drop_if_empty, str): args.drop_if_empty = args.drop_if_empty.split(",") else: @@ -565,22 +602,6 @@ def clean_data_for_plotting( for col in args.drop_if_empty: assert col in data.columns, f"Column {col} not in data.columns" data = drop_frames_without_results(data, subset=args.drop_if_empty) - if args.pareto_dict is None: - sense_dict = {} - else: - if Path(args.pareto_dict).exists(): - with open(args.pareto_dict, "r") as f: - sense_dict = yaml.safe_load(f) - elif ( - isinstance(args.pareto_dict.split(":")[:-2], str) - and Path(args.pareto_dict.split(":")[:-2]).exists() - ): - with open(Path(args.pareto_dict.split(":")[:-2]), "r") as f: - sense_dict = yaml.safe_load(f)[args.pareto_dict.split(":")[:-1]] - else: - raise ValueError( - f"Pareto_dictionary, {args.pareto_dict} does not exist as a file or file and dictionary using file:dictionary notation.", - ) # Reads Config file with open(Path(args.config), "r") as f: big_dict = yaml.load(f, Loader=yaml.FullLoader) @@ -588,16 +609,23 @@ def clean_data_for_plotting( atk_gen_dict = big_dict.get("attacks", {}) control_dict = big_dict.get("params", {}) fillna = big_dict.get("fillna", {}) - min_max = big_dict.get("min_max", ["nb_epoch"]) - + min_max = big_dict.get("min_max", []) + replace_dict = big_dict.get("replace", {}) + pareto_dict = big_dict.get("pareto", {}) + drop_dict = big_dict.pop("drop_values", {}) + data = drop_values(data, drop_dict) results = clean_data_for_plotting( data, def_gen_dict, atk_gen_dict, control_dict, fillna=fillna, + replace_dict=replace_dict, + pareto_dict=pareto_dict, ) - results = calculate_failure_rate(results) + + if "adv_accuracy" in results.columns: + results = calculate_failure_rate(results) results = min_max_scaling(results, *min_max) output_file = save_results( @@ -609,3 +637,8 @@ def clean_data_for_plotting( output_file, ).exists(), f"File {output_file} does not exist. Please specify a valid file using the -o flag." logger.info(f"Saved results to {output_file}") + + +if __name__ == "__main__": + args = parser.parse_args() + main(args) diff --git a/deckard/layers/compile.py b/deckard/layers/compile.py index da7c4f8b..b9059389 100644 --- a/deckard/layers/compile.py +++ b/deckard/layers/compile.py @@ -20,7 +20,7 @@ def flatten_results(df: pd.DataFrame) -> pd.DataFrame: df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) for col in tqdm(df.columns, desc="Flattening columns"): if isinstance(df[col][0], dict): - tmp = pd.json_normalize(df[col]) + tmp = pd.json_normalize(df[col].fillna({i: {} for i in df[col].index})) tmp.columns = [f"{col}.{subcol}" for subcol in tmp.columns] tmp.index = df.index df = pd.merge(df, tmp, left_index=True, how="outer", right_index=True) @@ -79,14 +79,31 @@ def read_file(file, results): if folder not in results: results[folder] = {} if suffix == ".json": - with open(file, "r") as f: - try: + try: + retries = locals().get("retries", 0) + with open(file, "r") as f: dict_ = json.load(f) - except json.decoder.JSONDecodeError as e: + except json.decoder.JSONDecodeError as e: + logger.error(f"Error reading {file}") + print(f"Error reading {file}. Please fix the file and press Enter.") + input( + "Press Enter to continue. The next failure on this file will raise an error.", + ) + if retries > 1: raise e + else: + with open(file, "r") as f: + dict_ = json.load(f) + retries += 1 elif suffix == ".yaml": with open(file, "r") as f: - dict_ = yaml.safe_load(f) + try: + dict_ = yaml.safe_load(f) + except Exception as e: + logger.error(f"Error reading {file}") + print(f"Error reading {file}") + input("Press Enter to raise the error.") + raise e else: raise ValueError(f"File type {suffix} not supported.") results[folder]["stage"] = stage @@ -106,16 +123,16 @@ def save_results(results, results_file, results_folder) -> str: """ results_file = Path(results_folder, results_file) logger.info(f"Saving data to {results_file}") - Path(results_folder).mkdir(exist_ok=True, parents=True) + Path(results_file).parent.mkdir(exist_ok=True, parents=True) suffix = results_file.suffix if suffix == ".csv": - results.to_csv(results_file) + results.to_csv(results_file, index=True) elif suffix == ".xlsx": - results.to_excel(results_file) + results.to_excel(results_file, index=True) elif suffix == ".html": - results.to_html(results_file) + results.to_html(results_file, index=True) elif suffix == ".json": - results.to_json(results_file) + results.to_json(results_file, index=True, orient="records") else: raise ValueError(f"File type {suffix} not supported.") assert Path( @@ -155,7 +172,6 @@ def load_results(results_file, results_folder) -> pd.DataFrame: parser.add_argument("--results_file", type=str, default="results.csv") parser.add_argument("--report_folder", type=str, default="reports", required=True) parser.add_argument("--results_folder", type=str, default=".") - parser.add_argument("--config", type=str, default="conf/compile.yaml") parser.add_argument("--exclude", type=list, default=None, nargs="*") parser.add_argument("--verbose", type=str, default="INFO") args = parser.parse_args() diff --git a/deckard/layers/data.py b/deckard/layers/data.py new file mode 100644 index 00000000..95768bbc --- /dev/null +++ b/deckard/layers/data.py @@ -0,0 +1,53 @@ +#! /usr/bin/env python + +import logging +from pathlib import Path +import argparse + + +from .utils import save_params_file, run_stages + +logger = logging.getLogger(__name__) + +data_parser = argparse.ArgumentParser() +data_parser.add_argument("stage", type=str, nargs="*", default=None) +data_parser.add_argument("--verbosity", type=str, default="INFO") +data_parser.add_argument("--params_file", type=str, default="params.yaml") +data_parser.add_argument("--pipeline_file", type=str, default="dvc.yaml") +data_parser.add_argument("--config_dir", type=str, default="conf") +data_parser.add_argument("--config_file", type=str, default="default") +data_parser.add_argument("--workdir", type=str, default=".") +data_parser.add_argument("--overrides", nargs="*", default=[], type=str) + + +def data_main(args): + config_dir = Path(args.workdir, args.config_dir).absolute().as_posix() + logging.basicConfig( + level=args.verbosity, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + if args.overrides is not None and len(args.overrides) > 0: + save_params_file( + config_dir=config_dir, + config_file=args.config_file, + params_file=args.params_file, + overrides=args.overrides, + ) + logger.info( + f"Using existing params file {args.params_file} in directory {args.workdir}", + ) + results = run_stages( + stages=args.stage, + pipeline_file=args.pipeline_file, + params_file=args.params_file, + repo=args.workdir, + config_dir=config_dir, + config_file=args.config_file, + sub_dict="data", + ) + return results + + +if __name__ == "__main__": + args = data_parser.parse_args() + data_main(args) diff --git a/deckard/layers/experiment.py b/deckard/layers/experiment.py index 14d9ba43..b4336cb8 100644 --- a/deckard/layers/experiment.py +++ b/deckard/layers/experiment.py @@ -1,133 +1,52 @@ +#! /usr/bin/env python + import logging from pathlib import Path -import dvc.api -from hydra.utils import instantiate - -from dulwich.errors import NotGitRepository -import yaml import argparse -from copy import deepcopy -from ..base.utils import unflatten_dict - -logger = logging.getLogger(__name__) - -__all__ = [ - "get_dvc_stage_params", - "run_stage", - "get_stages", - "run_stages", -] - - -def get_dvc_stage_params( - stage, - params_file="params.yaml", - pipeline_file="dvc.yaml", - directory=".", - name=None, -): - logger.info( - f"Getting params for stage {stage} from {params_file} and {pipeline_file} in {directory}.", - ) - params = dvc.api.params_show(stages=stage) - params.update({"_target_": "deckard.base.experiment.Experiment"}) - files = dvc.api.params_show(pipeline_file, stages=stage, repo=directory) - unflattened_files = unflatten_dict(files) - params["files"] = dict(unflattened_files.get("files", unflattened_files)) - params["files"]["_target_"] = "deckard.base.files.FileConfig" - params["files"]["stage"] = stage - params["stage"] = stage - if name is not None: - params["files"]["name"] = name - return params -def run_stage( - params_file="params.yaml", - pipeline_file="dvc.yaml", - directory=".", - stage=None, -): - logger.info( - f"Running stage {stage} with params_file: {params_file} and pipeline_file: {pipeline_file} in directory {directory}", - ) - params = get_dvc_stage_params( - stage=stage, - params_file=params_file, - pipeline_file=pipeline_file, - directory=directory, - ) - exp = instantiate(params) - id_ = exp.name - files = deepcopy(exp.files()) - params_file = Path(files["score_dict_file"]).with_name("params.yaml").as_posix() - Path(params_file).parent.mkdir(exist_ok=True, parents=True) - with Path(params_file).open("w") as f: - yaml.dump(params, f) - score = exp() - return id_, score - +from .utils import save_params_file, run_stages -def get_stages(pipeline_file="dvc.yaml", stages=None, repo=None): - try: - def_stages = list( - dvc.api.params_show(pipeline_file, repo=repo)["stages"].keys(), - ) - except NotGitRepository: - raise ValueError( - f"Directory {repo} is not a git repository. Please run `dvc init` in {repo} and try again.", - ) - if stages is None or stages == []: - raise ValueError(f"Please specify one or more stage(s) from {def_stages}") - elif isinstance(stages, str): - stages = [stages] - else: - assert isinstance(stages, list), f"args.stage is of type {type(stages)}" - for stage in stages: - assert ( - stage in def_stages - ), f"Stage {stage} not found in {pipeline_file}. Available stages: {def_stages}" - return stages +logger = logging.getLogger(__name__) +experiment_parser = argparse.ArgumentParser() +experiment_parser.add_argument("stage", type=str, nargs="*", default=None) +experiment_parser.add_argument("--verbosity", type=str, default="INFO") +experiment_parser.add_argument("--params_file", type=str, default="params.yaml") +experiment_parser.add_argument("--pipeline_file", type=str, default="dvc.yaml") +experiment_parser.add_argument("--config_dir", type=str, default="conf") +experiment_parser.add_argument("--config_file", type=str, default="default") +experiment_parser.add_argument("--workdir", type=str, default=".") +experiment_parser.add_argument("--overrides", nargs="*", default=[], type=str) -def run_stages(stages, pipeline_file="dvc.yaml", params_file="params.yaml", repo=None): - results = {} - stages = get_stages(stages=stages, pipeline_file=pipeline_file, repo=repo) - for stage in stages: - id_, score = run_stage( - stage=stage, - pipeline_file=pipeline_file, - params_file=params_file, - directory=repo, - ) - results[id_] = score - return results - -if __name__ == "__main__": - logger = logging.getLogger(__name__) - dvc_parser = argparse.ArgumentParser() - dvc_parser.add_argument("stage", type=str, nargs="*", default=None) - dvc_parser.add_argument("--verbosity", type=str, default="INFO") - dvc_parser.add_argument("--params_file", type=str, default="params.yaml") - dvc_parser.add_argument("--pipeline_file", type=str, default="dvc.yaml") - dvc_parser.add_argument("--config_dir", type=str, default="conf") - dvc_parser.add_argument("--config_file", type=str, default="default") - dvc_parser.add_argument("--workdir", type=str, default=".") - args = dvc_parser.parse_args() - config_dir = Path(args.workdir, args.config_dir).resolve().as_posix() - # save_params_file( - # config_dir=config_dir, - # config_file=args.config_file, - # params_file=args.params_file, - # ) +def experiment_main(args): + config_dir = Path(args.workdir, args.config_dir).absolute().as_posix() logging.basicConfig( level=args.verbosity, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) + if args.overrides is not None and len(args.overrides) > 0: + save_params_file( + config_dir=config_dir, + config_file=args.config_file, + params_file=args.params_file, + overrides=args.overrides, + ) + logger.info( + f"Using existing params file {args.params_file} in directory {args.workdir}", + ) results = run_stages( stages=args.stage, pipeline_file=args.pipeline_file, params_file=args.params_file, repo=args.workdir, + config_dir=config_dir, + config_file=args.config_file, ) + return results + + +if __name__ == "__main__": + args = experiment_parser.parse_args() + experiment_main(args) diff --git a/deckard/layers/find_best.py b/deckard/layers/find_best.py index 0461f28e..eb582d51 100644 --- a/deckard/layers/find_best.py +++ b/deckard/layers/find_best.py @@ -3,6 +3,7 @@ from pathlib import Path from hydra import initialize_config_dir, compose from omegaconf import OmegaConf +import argparse import yaml from ..base.utils import flatten_dict @@ -19,7 +20,7 @@ def find_optuna_best( config_folder=Path(Path(), "conf"), default_config="default.yaml", config_subdir=None, - direction=None, + direction="maximize", ): logger.info(f"Study name: {study_name}") logger.info(f"Storage name: {storage_name}") @@ -31,59 +32,115 @@ def find_optuna_best( ) df = study.trials_dataframe(attrs=("number", "value", "params", "state")) if study_csv is not None: + Path(study_csv).parent.mkdir(parents=True, exist_ok=True) df.to_csv(study_csv) + # To dotlist best_params = flatten_dict(study.best_params) more_params = flatten_dict(study.best_trial.user_attrs) even_more_params = flatten_dict(study.best_trial.system_attrs) logger.debug(f"Best params: {best_params}") logger.debug(f"Best user params: {more_params}") logger.debug(f"Best system params: {even_more_params}") - best_params = {**more_params, **best_params} + # Merge all the params + best_params = OmegaConf.to_container( + OmegaConf.merge(best_params, more_params, even_more_params), + resolve=False, + ) + # to dotlist + best_params = flatten_dict(best_params) overrides = [] + # Changing the keys to hydra override format for key, value in best_params.items(): - logger.info(f"Overriding {key} with {value}") - overrides.append(f"{key}={value}") - with initialize_config_dir(config_dir=config_folder, version_base="1.3"): - cfg = compose(config_name=default_config, overrides=overrides) - cfg = OmegaConf.to_container(cfg, resolve=False) - if params_file is not None: - if params_file is True: - if config_subdir is not None: - params_file = Path( - config_folder, - f"{config_subdir}", - f"{default_config}.yaml", - ) - params = cfg.get(config_subdir) - else: - params_file = Path(config_folder, f"{default_config}.yaml") - params = cfg + if ( + key.startswith("++") or key.startswith("~~") or key.startswith("--") + ): # reserved meaning + pass + elif key.startswith("+"): # appends to config + key = "++" + key[1:] # force override else: - if config_subdir is not None: - params_file = Path( - config_folder, - f"{config_subdir}", - f"{params_file}.yaml", - ) - params = cfg.get(config_subdir) + key = "++" + key # force override + if config_subdir is None: + overrides.append(f"{key}={value}") + else: # if we are using a subdir, we need to remove the directory from the key + if ( + key.startswith(f"++{config_subdir}.") + or key.startswith(f"~~{config_subdir}.") + or key.startswith(f"--{config_subdir}.") + ): + key = key.replace(f"{config_subdir}.", "") + overrides.append(f"{key}={value}") + logger.info(f"Adding {key} to param list") else: - params = cfg - params_file = Path(config_folder, f"{params_file}.yaml") - params_file.parent.mkdir(parents=True, exist_ok=True) - with open(params_file.with_suffix(".yaml"), "w") as f: - yaml.dump(params, f) - assert ( - params_file.exists() - ), f"{params_file.resolve().as_posix()} does not exist." + logger.debug(f"Skipping {key} because it is not in {config_subdir}") + params = override_default_with_best( + config_folder, + default_config, + overrides, + config_subdir=config_subdir, + ) + if params_file is not None: + params_file = create_new_config_in_subdir( + params_file, + config_folder, + default_config, + config_subdir, + params, + ) return params -if __name__ == "__main__": - import argparse +def create_new_config_in_subdir( + params_file, + config_folder, + default_config, + config_subdir, + params, +): + if params_file is True: + if config_subdir is not None: + params_file = Path( + config_folder, + f"{config_subdir}", + f"{default_config}.yaml", + ) + else: + params_file = Path(config_folder, f"{default_config}.yaml") + else: + if config_subdir is not None: + params_file = Path( + config_folder, + f"{config_subdir}", + f"{params_file}.yaml", + ) + else: + params_file = Path(config_folder, f"{params_file}.yaml") + params_file.parent.mkdir(parents=True, exist_ok=True) + with open(params_file.with_suffix(".yaml"), "w") as f: + yaml.dump(params, f) + assert params_file.exists(), f"{params_file.resolve().as_posix()} does not exist." + + return params_file + +def override_default_with_best( + config_folder, + default_config, + overrides, + config_subdir=None, +): + if config_subdir is not None: + config_folder = Path(config_folder, config_subdir) + config_folder = config_folder.resolve().as_posix() + with initialize_config_dir(config_dir=config_folder, version_base="1.3"): + cfg = compose(config_name=default_config, overrides=overrides) + cfg = OmegaConf.to_container(cfg, resolve=False) + return cfg + + +if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--params_file", type=str, default=True) - parser.add_argument("--study_type", type=str, default="optuna") + parser.add_argument("--study_csv", type=str, default=None) parser.add_argument("--config_folder", type=str, default=Path(Path(), "conf")) parser.add_argument("--default_config", type=str, default="default") @@ -91,25 +148,22 @@ def find_optuna_best( parser.add_argument("--study_name", type=str, required=True) parser.add_argument("--config_name", type=str) parser.add_argument("--verbosity", type=str, default="INFO") + parser.add_argument("--storage_name", type=str, required=True) + parser.add_argument("--direction", type=str, default="maximize") + parser.add_argument("--study_type", type=str, default="optuna") args = parser.parse_args() args.config_folder = Path(args.config_folder).resolve().as_posix() - + logging if args.study_type == "optuna": - with open( - Path(args.config_folder, args.default_config).with_suffix(".yaml"), - "r", - ) as f: - default_params = yaml.load(f, Loader=yaml.FullLoader) - if "hydra" in default_params: - hydra_params = default_params.pop("hydra") - study_name = args.study_name - storage_name = hydra_params["sweeper"]["storage"] - direction = default_params.get("direction", "maximize") + storage_name = args.storage_name + direction = args.direction + if len(direction) == 1: + direction = direction[0] find_optuna_best( - study_name=study_name, - storage_name=storage_name, + study_name=args.study_name, + storage_name=args.storage_name, study_csv=args.study_csv, params_file=args.params_file, config_folder=args.config_folder, diff --git a/deckard/layers/model.py b/deckard/layers/model.py new file mode 100644 index 00000000..5b098570 --- /dev/null +++ b/deckard/layers/model.py @@ -0,0 +1,53 @@ +#! /usr/bin/env python + +import logging +from pathlib import Path +import argparse + + +from .utils import save_params_file, run_stages + +logger = logging.getLogger(__name__) + +model_parser = argparse.ArgumentParser() +model_parser.add_argument("stage", type=str, nargs="*", default=None) +model_parser.add_argument("--verbosity", type=str, default="INFO") +model_parser.add_argument("--params_file", type=str, default="params.yaml") +model_parser.add_argument("--pipeline_file", type=str, default="dvc.yaml") +model_parser.add_argument("--config_dir", type=str, default="conf") +model_parser.add_argument("--config_file", type=str, default="default") +model_parser.add_argument("--workdir", type=str, default=".") +model_parser.add_argument("--overrides", nargs="*", default=[], type=str) + + +def model_main(args): + config_dir = Path(args.workdir, args.config_dir).absolute().as_posix() + logging.basicConfig( + level=args.verbosity, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + if args.overrides is not None and len(args.overrides) > 0: + save_params_file( + config_dir=config_dir, + config_file=args.config_file, + params_file=args.params_file, + overrides=args.overrides, + ) + logger.info( + f"Using existing params file {args.params_file} in directory {args.workdir}", + ) + results = run_stages( + stages=args.stage, + pipeline_file=args.pipeline_file, + params_file=args.params_file, + repo=args.workdir, + config_dir=config_dir, + config_file=args.config_file, + sub_dict="model", + ) + return results + + +if __name__ == "__main__": + args = model_parser.parse_args() + model_main(args) diff --git a/deckard/layers/optimise.py b/deckard/layers/optimise.py index 6b0280e7..71bf6893 100644 --- a/deckard/layers/optimise.py +++ b/deckard/layers/optimise.py @@ -32,7 +32,9 @@ def get_files( stage, ): """ - Gets the file names from + Gets the file names from cfg and calculates the hash of the attack, model and data, and files objects. + If "files.name == 'default'", the name is set to the hash of the cfg. + For attack, model and data, the file name is set to the hash of the respective object. """ if isinstance(cfg, dict): pass @@ -74,7 +76,9 @@ def get_files( cfg["files"]["_target_"] = "deckard.base.files.FileConfig" id_ = my_hash(cfg) cfg["name"] = id_ - cfg["files"]["name"] = id_ + cfg["files"]["name"] = ( + id_ if cfg["files"]["name"] == "default" else cfg["files"]["name"] + ) if stage is not None: cfg["files"]["stage"] = stage return cfg @@ -165,36 +169,6 @@ def parse_stage(stage: str = None, params: dict = None, path=None) -> dict: else: assert isinstance(stage, list), f"args.stage is of type {type(stage)}" stages = stage - # if params is None: - # with open(Path(path, "params.yaml"), "r") as f: - # default_params = yaml.load(f, Loader=yaml.FullLoader) - # key_list = [] - # for stage in stages: - # with open(Path(path, "dvc.yaml"), "r") as f: - # new_keys = yaml.load(f, Loader=yaml.FullLoader)["stages"][stage][ - # "params" - # ] - # key_list.extend(new_keys) - # params = read_subset_of_params(key_list, params) - # params = merge_params(default_params, params) - # elif isinstance(params, str) and Path(params).is_file() and Path(params).exists(): - # with open(Path(params), "r") as f: - # params = yaml.load(f, Loader=yaml.FullLoader) - # assert isinstance( - # params, - # dict, - # ), f"Params in file {params} must be a dict. It is a {type(params)}." - # key_list = [] - # for stage in stages: - # with open(Path(path, "dvc.yaml"), "r") as f: - # new_keys = yaml.load(f, Loader=yaml.FullLoader)["stages"][stage][ - # "params" - # ] - # key_list.extend(new_keys) - # with open(Path(path, "params.yaml"), "r") as f: - # all_params = yaml.load(f, Loader=yaml.FullLoader) - # default_params = read_subset_of_params(key_list, all_params) - # params = merge_params(default_params, params) if isinstance(params, dict): key_list = [] for stage in stages: @@ -228,11 +202,14 @@ def parse_stage(stage: str = None, params: dict = None, path=None) -> dict: files = params["files"] file_list = list(files.keys()) for key in file_list: - template_string = "${files." + key + "}" - if template_string in file_string: - pass - else: - params["files"].pop(key) + if key == "params.yaml": + continue + if key.endswith("_file") or key.endswith("_dir"): + template_string = "${files." + key + "}" + if template_string in file_string: + pass + else: + params["files"].pop(key) params = get_files(params, stage) return params @@ -265,11 +242,13 @@ def write_stage(params: dict, stage: str, path=None, working_dir=None) -> None: def optimise(cfg: DictConfig) -> None: cfg = OmegaConf.to_container(OmegaConf.create(cfg), resolve=True) - raise_exception = cfg.pop("raise_exception", False) + raise_exception = cfg.pop("raise_exception", True) working_dir = Path(config_path).parent direction = cfg.get("direction", "minimize") direction = [direction] if not isinstance(direction, list) else direction optimizers = cfg.get("optimizers", None) + optimizers = [optimizers] if not isinstance(optimizers, list) else optimizers + assert len(optimizers) == len(direction) stage = cfg.pop("stage", None) cfg = parse_stage(params=cfg, stage=stage, path=working_dir) exp = instantiate(cfg) @@ -288,36 +267,39 @@ def optimise(cfg: DictConfig) -> None: scores.append(score_dict[optimizer]) else: if direction[i] == "minimize": - scores.append(1e10) + scores.append(1.00000000000) elif direction[i] == "maximize": - scores.append(-1e10) + scores.append(0.00000000000) else: scores.append(None) i += 1 logger.info(f"Optimizers are : {optimizers}") logger.info(f"Score is : {scores}") except Exception as e: - logger.warning( - f"Exception {e} occured while running experiment {id_}. Setting score to default for specified direction (e.g. -/+ 1e10).", - ) with open(Path(folder, "exception.log"), "w") as f: f.write(str(e)) f.write(traceback.format_exc()) - fake_scores = [] - for direction in direction: - if direction == "minimize": - fake_scores.append(1e10) - elif direction == "maximize": - fake_scores.append(-1e10) - else: - fake_scores.append(None) - scores = fake_scores - logger.info(f"Optimizers: {optimizers}") - logger.info(f"Score: {scores}") - if raise_exception: + if not raise_exception: + logger.warning( + f"Exception {e} occured while running experiment {id_}. Setting score to default for specified direction (e.g. -/+ 1e10).", + ) + fake_scores = [] + for direction in direction: + if direction == "minimize": + fake_scores.append(1.00000000000) + elif direction == "maximize": + fake_scores.append(0.00000000000) + else: + fake_scores.append(None) + scores = fake_scores + logger.info(f"Optimizers: {optimizers}") + logger.info(f"Score: {scores}") + else: raise e if len(scores) == 1: - scores = scores[0] + scores = float(scores[0]) + else: + scores = [float(x) for x in scores] return scores diff --git a/deckard/layers/parse.py b/deckard/layers/parse.py index 3e879cd5..44a2200b 100644 --- a/deckard/layers/parse.py +++ b/deckard/layers/parse.py @@ -14,10 +14,8 @@ hydra_parser.add_argument("--config_file", type=str, default="default") hydra_parser.add_argument("--workdir", type=str, default=".") -if __name__ == "__main__": - logger = logging.getLogger(__name__) - args = hydra_parser.parse_args() +def parse_hydra_config(args) -> None: logging.basicConfig(level=args.verbosity) config_dir = Path(Path(), args.config_dir).resolve().as_posix() OmegaConf.register_new_resolver("eval", eval) @@ -32,3 +30,9 @@ ) os.environ["DECKARD_DEFAULT_CONFIG"] = args.config_file os.environ["DECKARD_CONFIG_PATH"] = args.config_dir + return None + + +if __name__ == "__main__": + args = hydra_parser.parse_args() + parse_hydra_config(args) diff --git a/deckard/layers/plots.py b/deckard/layers/plots.py index fcfa03a4..99f77e52 100644 --- a/deckard/layers/plots.py +++ b/deckard/layers/plots.py @@ -25,7 +25,7 @@ def cat_plot( hue_order=None, rotation=0, set={}, - filetype=".pdf", + filetype=".eps", **kwargs, ): """ @@ -66,27 +66,45 @@ def cat_plot( plot. You can pass any valid keyword arguments that are accepted by the `set()` method of the `seaborn.FacetGrid` object. These properties can be used to customize the appearance of the plot, filetype: The `filetype` parameter is used to specify the file extension for saving the graph. By - default, it is set to ".pdf", but you can change it to any other valid file extension such as - ".png", ".jpg", etc. Defaults to .pdf + default, it is set to ".eps", but you can change it to any other valid file extension such as + ".png", ".jpg", etc. Defaults to .eps """ plt.gcf().clear() - file = Path(file).with_suffix(filetype) + suffix = Path(file).suffix + if suffix is not None: + file = Path(file) + else: + file = Path(file).with_suffix(filetype) logger.info(f"Rendering graph {file}") - data = data.sort_values(by=[hue, x, y]) - logger.debug( - f"Data sorted by x:{x}, y:{y}, hue:{hue}, kind:{kind}, hue_order:{hue_order}, and kwargs:{kwargs}.", - ) - graph = sns.catplot( - data=data, x=x, y=y, hue=hue, kind=kind, hue_order=hue_order, **kwargs - ) + if hue is not None: + data = data.sort_values(by=[hue, x, y]) + logger.debug( + f"Data sorted by x:{x}, y:{y}, hue:{hue}, kind:{kind}, hue_order:{hue_order}, and kwargs:{kwargs}.", + ) + graph = sns.catplot( + data=data, + x=x, + y=y, + hue=hue, + kind=kind, + hue_order=hue_order, + **kwargs, + ) + else: + data = data.sort_values(by=[x, y]) + logger.debug(f"Data sorted by x:{x}, y:{y}, kind:{kind}, and kwargs:{kwargs}.") + graph = sns.catplot(data=data, x=x, y=y, kind=kind, **kwargs) graph.set_xlabels(xlabels) graph.set_ylabels(ylabels) graph.set_titles(titles) if legend_title is not None: graph.legend.set_title(title=legend_title) else: - graph.legend.remove() + if graph.legend is not None: + graph.legend.remove() + else: + pass graph.set_xticklabels(graph.axes.flat[-1].get_xticklabels(), rotation=rotation) graph.set(**set) graph.tight_layout() @@ -99,7 +117,6 @@ def line_plot( data, x, y, - hue, xlabel, ylabel, title, @@ -108,8 +125,7 @@ def line_plot( y_scale=None, x_scale=None, legend={}, - hue_order=None, - filetype=".pdf", + filetype=".eps", **kwargs, ): """ @@ -146,21 +162,35 @@ def line_plot( variable in the plot. It is a list that determines the order in which the different categories of the `hue` variable will be plotted. filetype: The `filetype` parameter specifies the file type of the saved graph. In the given code, - the default value is set to ".pdf", indicating that the graph will be saved as a PDF file. However, - you can change the value of `filetype` to save the graph in a different. Defaults to .pdf + the default value is set to ".eps", indicating that the graph will be saved as a PDF file. However, + you can change the value of `filetype` to save the graph in a different. Defaults to .eps Returns: the line plot graph object. """ plt.gcf().clear() - file = Path(file).with_suffix(filetype) + suffix = Path(file).suffix + if suffix is not None: + file = Path(file) + else: + file = Path(file).with_suffix(filetype) logger.info(f"Rendering graph {file}") - data = data.sort_values(by=[hue, x, y]) - graph = sns.lineplot(data=data, x=x, y=y, hue=hue, hue_order=hue_order, **kwargs) + if "hue" in kwargs and kwargs.get("hue") in data.columns: + hue = kwargs.get("hue") + data = data.sort_values(by=[hue, x, y]) + else: + data.sort_values(by=[x, y]) + xlim = kwargs.pop("xlim", None) + ylim = kwargs.pop("ylim", None) + graph = sns.lineplot(data=data, x=x, y=y, **kwargs) graph.legend(**legend) graph.set_xlabel(xlabel) graph.set_ylabel(ylabel) graph.set_title(title) + if xlim is not None: + graph.set_xlim(xlim) + if ylim is not None: + graph.set_ylim(ylim) if y_scale is not None: graph.set_yscale(y_scale) if x_scale is not None: @@ -186,7 +216,7 @@ def scatter_plot( x_scale=None, legend={}, hue_order=None, - filetype=".pdf", + filetype=".eps", **kwargs, ): """ @@ -221,15 +251,19 @@ def scatter_plot( variable in the scatter plot. By default, the levels of the `hue` variable are ordered based on the order in which they appear in the data. However, if you want to specify a specific filetype: The `filetype` parameter is a string that specifies the file type of the saved graph. It - is used to determine the file extension of the saved graph file. By default, it is set to ".pdf", - indicating that the graph will be saved as a PDF file. However, you can change. Defaults to .pdf + is used to determine the file extension of the saved graph file. By default, it is set to ".eps", + indicating that the graph will be saved as a PDF file. However, you can change. Defaults to .eps Returns: the scatter plot graph object. """ plt.gcf().clear() - file = Path(file).with_suffix(filetype) + suffix = Path(file).suffix + if suffix is not None: + file = Path(file) + else: + file = Path(file).with_suffix(filetype) logger.info(f"Rendering graph {file}") data = data.sort_values(by=[hue, x, y]) graph = sns.scatterplot( @@ -240,8 +274,10 @@ def scatter_plot( hue_order=hue_order, **kwargs, ) - graph.set_yscale(y_scale) - graph.set_xscale(x_scale) + if y_scale is not None: + graph.set_yscale(y_scale) + if x_scale is not None: + graph.set_xscale(x_scale) graph.set_xlabel(xlabel) graph.set_ylabel(ylabel) graph.legend(**legend) @@ -254,42 +290,43 @@ def scatter_plot( return graph -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "-p", - "--path", - type=str, - help="Path to the plot folder", - required=True, - ) - parser.add_argument( - "-f", - "--file", - type=str, - help="Data file to read from", - required=True, - ) - parser.add_argument( - "-t", - "--plotfiletype", - type=str, - help="Filetype of the plots", - default=".pdf", - ) - parser.add_argument( - "-v", - "--verbosity", - default="INFO", - help="Increase output verbosity", - ) - parser.add_argument( - "-c", - "--config", - help="Path to the config file", - default="conf/plots.yaml", - ) - args = parser.parse_args() +parser = argparse.ArgumentParser() +parser.add_argument( + "-p", + "--path", + type=str, + help="Path to the plot folder", + required=True, +) +parser.add_argument( + "-f", + "--file", + type=str, + help="Data file to read from", + required=True, +) +parser.add_argument( + "-t", + "--plotfiletype", + type=str, + help="Filetype of the plots", + default=".eps", +) +parser.add_argument( + "-v", + "--verbosity", + default="INFO", + help="Increase output verbosity", +) +parser.add_argument( + "-c", + "--config", + help="Path to the config file", + default="conf/plots.yaml", +) + + +def main(args): logging.basicConfig(level=args.verbosity) assert Path( args.file, @@ -334,3 +371,8 @@ def scatter_plot( for dict_ in scatter_plot_list: i += 1 scatter_plot(data, **dict_, folder=FOLDER, filetype=IMAGE_FILETYPE) + + +if __name__ == "__main__": + args = parser.parse_args() + main(args) diff --git a/deckard/layers/prometheus.py b/deckard/layers/prometheus.py index 7e0171d4..68e20092 100644 --- a/deckard/layers/prometheus.py +++ b/deckard/layers/prometheus.py @@ -1,185 +1,188 @@ -import experiments.libs.functions -from prometheus_api_client import PrometheusConnect -from datetime import datetime -from dataclasses import dataclass -from pathlib import Path -import yaml +# import experiments.libs.functions +# from prometheus_api_client import PrometheusConnect +# from datetime import datetime +# from dataclasses import dataclass +# from pathlib import Path +# import yaml -@dataclass -class PromQuery: - prom_host = "labumu.se" - prom_port = "30090" - prom_address = "http://" + prom_host + ":" + prom_port + "/" - warmup = 9000 - warmdown = 3000 - step = 5 - query = "" - start = 0 - end = 0 - service = "" - namespace = "" - percentile = "" - reporter = "source" - response_code = "" +# @dataclass +# class PromQuery: +# prom_host = "labumu.se" +# prom_port = "30090" +# prom_address = "http://" + prom_host + ":" + prom_port + "/" +# warmup = 9000 +# warmdown = 3000 +# step = 5 +# query = "" +# start = 0 +# end = 0 +# service = "" +# namespace = "" +# percentile = "" +# reporter = "source" +# response_code = "" - def query_prometheus(self): - """ - This function collects data in prometheus for a given query, in a given time interval, with a given - warmup/warmdown time offset and a given step. - :return: - """ - prom = PrometheusConnect(url=self.prom_address, disable_ssl=True) - start = datetime.fromtimestamp((self.start + self.warmup) / 1000) - end = datetime.fromtimestamp((self.end - self.warmdown) / 1000) +# def query_prometheus(self): +# """ +# This function collects data in prometheus for a given query, in a given time interval, with a given +# warmup/warmdown time offset and a given step. +# :return: +# """ +# prom = PrometheusConnect(url=self.prom_address, disable_ssl=True) +# start = datetime.fromtimestamp((self.start + self.warmup) / 1000) +# end = datetime.fromtimestamp((self.end - self.warmdown) / 1000) - result = prom.custom_query_range( - query=self.query, start_time=start, end_time=end, step=self.step - ) - return result +# result = prom.custom_query_range( +# query=self.query, +# start_time=start, +# end_time=end, +# step=self.step, +# ) +# return result - def get_response_time(self, version=None): - """ - This function will get the response time for a given service in a given time interval and based on a given - percentile - :return: - """ - if version == None: - version = "latest" - if self.response_code == "": - self.query = ( - "(histogram_quantile(" - + str(self.percentile) - + ', sum(irate(istio_request_duration_milliseconds_bucket{reporter="' - + self.reporter - + '", destination_service=~"' - + self.service - + "." - + self.namespace - + '.svc.cluster.local", destination_canonical_revision="' - + version - + '"}[1m])) ' - "by (le)) / 1000)" - ) - elif self.response_code == "200": - self.query = ( - "(histogram_quantile(" - + str(self.percentile) - + ', sum(irate(istio_request_duration_milliseconds_bucket{reporter="' - + self.reporter - + '", destination_service=~"' - + self.service - + "." - + self.namespace - + '.svc.cluster.local",' - 'response_code="' - + self.response_code - + '", destination_canonical_revision="' - + version - + '"}[1m])) by (le)) / 1000)' - ) - else: - self.query = ( - "(histogram_quantile(" - + str(self.percentile) - + ', sum(irate(istio_request_duration_milliseconds_bucket{reporter="' - + self.reporter - + '", destination_service=~"' - + self.service - + "." - + self.namespace - + '.svc.cluster.local",' - 'response_code!="200", destination_canonical_revision="' - + version - + '"}[1m])) by (le)) / 1000)' - ) +# def get_response_time(self, version=None): +# """ +# This function will get the response time for a given service in a given time interval and based on a given +# percentile +# :return: +# """ +# if version == None: +# version = "latest" +# if self.response_code == "": +# self.query = ( +# "(histogram_quantile(" +# + str(self.percentile) +# + ', sum(irate(istio_request_duration_milliseconds_bucket{reporter="' +# + self.reporter +# + '", destination_service=~"' +# + self.service +# + "." +# + self.namespace +# + '.svc.cluster.local", destination_canonical_revision="' +# + version +# + '"}[1m])) ' +# "by (le)) / 1000)" +# ) +# elif self.response_code == "200": +# self.query = ( +# "(histogram_quantile(" +# + str(self.percentile) +# + ', sum(irate(istio_request_duration_milliseconds_bucket{reporter="' +# + self.reporter +# + '", destination_service=~"' +# + self.service +# + "." +# + self.namespace +# + '.svc.cluster.local",' +# 'response_code="' +# + self.response_code +# + '", destination_canonical_revision="' +# + version +# + '"}[1m])) by (le)) / 1000)' +# ) +# else: +# self.query = ( +# "(histogram_quantile(" +# + str(self.percentile) +# + ', sum(irate(istio_request_duration_milliseconds_bucket{reporter="' +# + self.reporter +# + '", destination_service=~"' +# + self.service +# + "." +# + self.namespace +# + '.svc.cluster.local",' +# 'response_code!="200", destination_canonical_revision="' +# + version +# + '"}[1m])) by (le)) / 1000)' +# ) - result = self.query_prometheus() - return result +# result = self.query_prometheus() +# return result - def get_status_codes(self, version=None): - """ - This function will get the request status codes for agiven service, in agiven time interval with a given - warmup/warmdown time offset and a given step +# def get_status_codes(self, version=None): +# """ +# This function will get the request status codes for agiven service, in agiven time interval with a given +# warmup/warmdown time offset and a given step - """ - if version == None: - version = "latest" - self.query = ( - 'round(sum(irate(istio_requests_total{destination_service=~"' - + self.service - + "" - "." - + self.namespace - + '.svc.cluster.local", reporter="source", destination_canonical_revision="' - + version - + '"}[1m])) by (response_code, response_flags), 0.001)' - ) - result = self.query_prometheus() - return result +# """ +# if version == None: +# version = "latest" +# self.query = ( +# 'round(sum(irate(istio_requests_total{destination_service=~"' +# + self.service +# + "" +# "." +# + self.namespace +# + '.svc.cluster.local", reporter="source", destination_canonical_revision="' +# + version +# + '"}[1m])) by (response_code, response_flags), 0.001)' +# ) +# result = self.query_prometheus() +# return result - def get_retried_requests(self, port, version=""): - """ - This function gets the number of retried requests for a given service, in given time interval with a given - warmup/warmdown time offset and a given step - """ - self.query = ( - 'round(sum(irate(envoy_cluster_upstream_rq_retry{cluster_name="outbound|' - + str(port) - + "|" - + version - + "|" - + self.service - + '.default.svc.cluster.local"}[1m])) by (), 0.001)' - ) - result = self.query_prometheus() - return result +# def get_retried_requests(self, port, version=""): +# """ +# This function gets the number of retried requests for a given service, in given time interval with a given +# warmup/warmdown time offset and a given step +# """ +# self.query = ( +# 'round(sum(irate(envoy_cluster_upstream_rq_retry{cluster_name="outbound|' +# + str(port) +# + "|" +# + version +# + "|" +# + self.service +# + '.default.svc.cluster.local"}[1m])) by (), 0.001)' +# ) +# result = self.query_prometheus() +# return result - def get_requests_in_queue(self): - """ - This function will get the request in the queue for a given service - """ - self.query = ( - 'round(sum(irate(envoy_http_inbound_0_0_0_0_5000_downstream_rq_active{app=~"' - + self.service - + '"}[1m])) by (service_istio_io_canonical_name), 0.001)' - ) - result = self.query_prometheus() - return result +# def get_requests_in_queue(self): +# """ +# This function will get the request in the queue for a given service +# """ +# self.query = ( +# 'round(sum(irate(envoy_http_inbound_0_0_0_0_5000_downstream_rq_active{app=~"' +# + self.service +# + '"}[1m])) by (service_istio_io_canonical_name), 0.001)' +# ) +# result = self.query_prometheus() +# return result - def get_current_queue_size(self, job="istio"): - """ - This function will get the current queue size which is pushed in pushgateway (HTTP2MaxRequests) - """ - self.query = 'destination_rule_http2_max_requests{exported_job="' + job + '"}' - result = self.query_prometheus() - return result +# def get_current_queue_size(self, job="istio"): +# """ +# This function will get the current queue size which is pushed in pushgateway (HTTP2MaxRequests) +# """ +# self.query = 'destination_rule_http2_max_requests{exported_job="' + job + '"}' +# result = self.query_prometheus() +# return result - def get_retry_attempt(self): - """ - This function get the retry attempt which is pushed in pushgateway (attempts) - """ - self.query = "retry_attempts_" + self.service - result = self.query_prometheus() - return result +# def get_retry_attempt(self): +# """ +# This function get the retry attempt which is pushed in pushgateway (attempts) +# """ +# self.query = "retry_attempts_" + self.service +# result = self.query_prometheus() +# return result - def __call__(self, config_file, output_file, output_folder) -> None: - """ - This function will call the prometheus query function and write the result in a given file - """ - # Available metrics: - # train_time, train_start_time, train_end_time, - # predict_proba_time, predict_proba_start_time, predict_proba_end_time, - # adv_train_time, adv_train_start_time, adv_train_end_time, - # adv_predict_proba_time, adv_predict_proba_start_time, adv_predict_proba_end_time, - # Find all output_file recursively inside output_folder - with open(config_file, "r") as f: - config = yaml.load(f, Loader=yaml.FullLoader) - files = Path(output_folder).rglob(output_file) - # Each file will have train_start_time train_end_time, predict_proba_start_time predict_predict_proba_end_time, adv_ - # Query Prometheus - # Do calulations - # Write to file - # Use a lambda function so that this will be parallelized across all the files in the files iterator and across each entry of the config - # Return None - None +# def __call__(self, config_file, output_file, output_folder) -> None: +# """ +# This function will call the prometheus query function and write the result in a given file +# """ +# # Available metrics: +# # train_time, train_start_time, train_end_time, +# # predict_proba_time, predict_proba_start_time, predict_proba_end_time, +# # adv_train_time, adv_train_start_time, adv_train_end_time, +# # adv_predict_proba_time, adv_predict_proba_start_time, adv_predict_proba_end_time, +# # Find all output_file recursively inside output_folder +# with open(config_file, "r") as f: +# config = yaml.load(f, Loader=yaml.FullLoader) +# files = Path(output_folder).rglob(output_file) +# # Each file will have train_start_time train_end_time, predict_proba_start_time predict_predict_proba_end_time, adv_ +# # Query Prometheus +# # Do calulations +# # Write to file +# # Use a lambda function so that this will be parallelized across all the files in the files iterator and across each entry of the config +# # Return None +# None diff --git a/deckard/layers/utils.py b/deckard/layers/utils.py index fc72e9a7..2217f785 100644 --- a/deckard/layers/utils.py +++ b/deckard/layers/utils.py @@ -1,15 +1,20 @@ import logging from pathlib import Path +import re from hydra.errors import OverrideParseException from omegaconf import OmegaConf +from omegaconf import SCMode from copy import deepcopy import yaml from hydra import initialize_config_dir, compose +import dvc.api +from hydra.utils import instantiate +from dulwich.errors import NotGitRepository from numpy import nan -from ..base.utils import my_hash +from ..base.utils import my_hash, flatten_dict, unflatten_dict logger = logging.getLogger(__name__) @@ -59,7 +64,7 @@ def find_conf_files( return files -def get_overrides(file: str, key: str = None, overrides=None): +def get_overrides(overrides=None): if overrides is None: overrides = {} else: @@ -137,14 +142,257 @@ def save_params_file( config_dir="conf", config_file="default", params_file="params.yaml", + working_directory=".", overrides=[], ): - config_dir = str(Path(Path(), config_dir).absolute().as_posix()) + config_dir = str(Path(working_directory, config_dir).absolute().as_posix()) + logger.info(f"Running save_params_file in config_dir: {config_dir}") with initialize_config_dir(config_dir=config_dir, version_base="1.3"): cfg = compose(config_name=config_file, overrides=overrides) - params = OmegaConf.to_container(cfg, resolve=True) - with open(params_file, "w") as f: - yaml.dump(params, f) - logger.info(f"Saved params file to {params_file}") + params = OmegaConf.to_container( + cfg, + resolve=True, + structured_config_mode=SCMode.DICT, + ) + with open(params_file, "w") as f: + yaml.dump(params, f) + logger.info(f"Saved params file to {params_file}") assert Path(params_file).exists(), f"Failed to save params file to {params_file}" return None + + +def get_dvc_stage_params( + stage, + params_file="params.yaml", + pipeline_file="dvc.yaml", + directory=".", + name=None, +): + logger.info( + f"Getting params for stage {stage} from {params_file} and {pipeline_file} in {directory}.", + ) + params = dvc.api.params_show(stages=stage, repo=directory) + params.update({"_target_": "deckard.base.experiment.Experiment"}) + params = OmegaConf.to_container(OmegaConf.create(params), resolve=True) + flat_params = flatten_dict(params) + pipe_params = dvc.api.params_show(pipeline_file, stages=stage, repo=directory)[ + "stages" + ][stage] + file_list = [] + for key in ["metrics", "deps", "outs", "plots"]: + param_string = str(pipe_params.get(key, {})) + # find all values within ${} and add them to file_list + file_list.extend(re.findall(r"\${(.*?)}", param_string)) + file_dict = {} + for k in file_list: + if k in flat_params: + file_dict[k] = flat_params[k] + else: + raise ValueError(f"File {k} not found in {pipe_params.keys()}") + file_dict = unflatten_dict(file_dict) + params["files"] = file_dict.pop("files", {}) + params["files"]["stage"] = stage + # Merge remaining params + params = OmegaConf.merge(params, file_dict) + params = OmegaConf.to_container(OmegaConf.create(params), resolve=True) + if name is not None: + params["files"]["name"] = name + return params + + +# def get_dvc_stage_params( +# stage, +# params_file="params.yaml", +# pipeline_file="dvc.yaml", +# directory=".", +# name=None, +# ): +# logger.info( +# f"Getting params for stage {stage} from {params_file} and {pipeline_file} in {directory}.", +# ) +# params = dvc.api.params_show(stages=stage) +# params.update({"_target_": "deckard.base.experiment.Experiment"}) +# pipe_params = dvc.api.params_show(pipeline_file, stages=stage, repo=directory) +# pipe_params = unflatten_dict(pipe_params) +# params["files"] = dict(pipe_params.pop("files", pipe_params)) +# params["files"]["_target_"] = "deckard.base.files.FileConfig" +# params["files"]["stage"] = stage +# params["stage"] = stage +# if name is not None: +# params["files"]["name"] = name +# # Merge remaining params +# params = OmegaConf.merge(params, pipe_params) +# return params + + +def prepare_files(params_file, stage, params, id_): + # Turns the dictionary into a FileConfig object. + # This creates a new directory at files.directory + # It also creates a new directory at files.directory/files.data_dir + # It also creates a new directory at files.directory/files.reports_dir + # If a stage is specified, it also creates a new directory at files.directory/files.reports/stage + params["files"]["_target_"] = "deckard.base.files.FileConfig" + params["files"]["stage"] = stage + params["files"]["name"] = ( + id_ if params["files"].get("name", None) is None else params["files"]["name"] + ) + params["files"]["params_file"] = Path(params_file).name + # This creates a the object + files = instantiate(params["files"]) + # Which will return the dictionary of the files + files = files.get_filenames() + # If the params_file is in the files, then the params_file is the params_file + if "params_file" in files: + params_file = files["params_file"] + # Otherwise we take the folder of the score_dict_file and change the name to whatever the params_file is + elif "score_dict_file" in files: + params_file = Path(files["score_dict_file"]).with_name(params_file) + else: + raise ValueError( + f"Neither params_file nor score_dict_file found in {list(files.keys())}.", + ) + + # Save the params to the params_file + Path(params_file).parent.mkdir(exist_ok=True, parents=True) + with Path(params_file).open("w") as f: + yaml.dump(params, f) + return files + + +def get_stages(pipeline_file="dvc.yaml", stages=None, repo=None): + try: + def_stages = list( + dvc.api.params_show(pipeline_file, repo=repo)["stages"].keys(), + ) + except NotGitRepository: + raise ValueError( + f"Directory {repo} is not a dvc repository. Please run `dvc init` in {repo} and try again.", + ) + if stages is None or stages == []: + logger.info("No stages specified. Running default from hydra configuration") + stages = [None] + elif isinstance(stages, str): + stages = [stages] + else: + assert isinstance(stages, list), f"args.stage is of type {type(stages)}" + for stage in stages: + assert ( + stage in def_stages + ), f"Stage {stage} not found in {pipeline_file}. Available stages: {def_stages}" + return stages + + +def get_params_from_disk( + params_file, + pipeline_file, + directory, + stage, + config_dir, + config_file, +): + if stage is not None: + params = get_dvc_stage_params( + stage=stage, + params_file=params_file, + pipeline_file=pipeline_file, + directory=directory, + ) + else: + # Use hydras compose to get the params + assert config_dir is not None, "config_dir must be specified if stage is None" + with initialize_config_dir( + config_dir=config_dir, + job_name=Path(config_file).stem, + version_base="1.3", + ): + cfg = compose(config_name=config_file) + params = OmegaConf.to_container(cfg, resolve=True) + params["files"] = dict(params.pop("files", params)) + params["files"]["_target_"] = "deckard.base.files.FileConfig" + params["files"]["stage"] = None + params["stage"] = None + return params + + +def run_stage( + params_file="params.yaml", + pipeline_file="dvc.yaml", + directory=".", + stage=None, + overrides=None, + config_dir=None, + config_file=None, + sub_dict=None, +): + logger.info( + f"Running stage {stage} with params_file: {params_file} and pipeline_file: {pipeline_file} in directory {directory}", + ) + params = get_params_from_disk( + params_file, + pipeline_file, + directory, + stage, + config_dir, + config_file, + ) + params = add_overrides(overrides, params) + if sub_dict is None: + params["_target_"] = "deckard.experiment.Experiment" + exp = instantiate(params) + id_ = exp.name + _ = prepare_files(params_file, stage, params, id_) + score = exp() + else: + possible_subdicts = ["data", "model", "attack", "scorers", "plots", "files"] + assert ( + sub_dict in possible_subdicts + ), f"sub_dict must be one of {possible_subdicts}" + target = f"deckard.{sub_dict}.{sub_dict.capitalize()}" + params["_target_"] = target + exp = instantiate(params[sub_dict]) + id_ = exp.name + files = params["files"] + params[sub_dict]["files"] = files + files = prepare_files(params_file, stage, params[sub_dict], id_) + score = exp(**files) + return id_, score + + +def add_overrides(overrides, params): + old_params = deepcopy(params) + if overrides is not None and len(overrides) > 0: + # convert from dot notation to nested dict + overrides = OmegaConf.from_dotlist(overrides) + params = OmegaConf.merge(params, overrides) + params = OmegaConf.to_container(params, resolve=True) + assert ( + params != old_params + ), f"Params are the same as before overrides: {overrides}" + params = OmegaConf.create(params) + params = OmegaConf.to_container(params, resolve=True) + return params + + +def run_stages( + stages, + pipeline_file="dvc.yaml", + params_file="params.yaml", + repo=None, + config_dir=None, + config_file=None, + sub_dict=None, +): + results = {} + stages = get_stages(stages=stages, pipeline_file=pipeline_file, repo=repo) + for stage in stages: + id_, score = run_stage( + stage=stage, + pipeline_file=pipeline_file, + params_file=params_file, + directory=repo, + config_dir=config_dir, + config_file=config_file, + sub_dict=sub_dict, + ) + results[id_] = score + return results diff --git a/examples/bit_depth/conf/compile.yaml b/examples/bit_depth/conf/compile.yaml index 1433e2cd..1d325efa 100644 --- a/examples/bit_depth/conf/compile.yaml +++ b/examples/bit_depth/conf/compile.yaml @@ -29,3 +29,35 @@ params: FSQ: model.art.pipeline.preprocessor.bit_depth Gauss-in: model.art.pipeline.preprocessor.sigma Control: model_layers + +attacks: + # CarliniL0Method: CW_0 + # CarliniL2Method: CW_2 + # CarliniLInfMethod: CW_inf + DeepFool: Deep + FastGradientMethod: FGM + HopSkipJump: HSJ + PixelAttack: Pixel + ProjectedGradientDescent: PGD + ThresholdAttack: Thresh +defences: + Control: Control + FeatureSqueezing: FSQ + GaussianAugmentation: Gauss-in + GaussianNoise: Gauss-out + HighConfidence: Conf +params: + # art.attacks.evasion.CarliniL0Method: attack.init.confidence + # art.attacks.evasion.CarliniL2Method: attack.init.confidence + # art.attacks.evasion.CarliniLInfMethod: attack.init.confidence + Deep: attack.init.nb_grads + FGM: attack.init.eps + HSJ: attack.init.max_iter + Pixel: attack.init.th + PGD: attack.init.eps + Thresh: attack.init.th + Gauss-out: model.art.pipeline.postprocessor.scale + Conf: model.art.pipeline.postprocessor.cutoff + FSQ: model.art.pipeline.preprocessor.bit_depth + Gauss-in: model.art.pipeline.preprocessor.sigma + Control: model_layers diff --git a/examples/gzip/.dvc/.gitignore b/examples/gzip/.dvc/.gitignore new file mode 100644 index 00000000..528f30c7 --- /dev/null +++ b/examples/gzip/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/examples/gzip/.dvcignore b/examples/gzip/.dvcignore new file mode 100644 index 00000000..51973055 --- /dev/null +++ b/examples/gzip/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/examples/gzip/.gitignore b/examples/gzip/.gitignore new file mode 100644 index 00000000..8e6ca79d --- /dev/null +++ b/examples/gzip/.gitignore @@ -0,0 +1,11 @@ +output +*.db +kdd_nsl +truthseeker +ddos +sms_spam +/raw_data +.dvc/config +2-22/ +2-28/ +/params.yaml diff --git a/examples/gzip/README.md b/examples/gzip/README.md new file mode 100644 index 00000000..43af47f2 --- /dev/null +++ b/examples/gzip/README.md @@ -0,0 +1,70 @@ + + +# Installation +You should probably be using a virutal environment rather than installing things globally. Why? 1. Because you can just remove this folder after to delete all traces of this software. 2. Your system might have python3 rather than python even though python2 is long dead. The dvc.yaml file contains scripts that are executed in whatever environment you run the `dvc repro` (see below) command from, and changing the python interpreter of the dvc command won't change that. You're welcome to change the call to the binary in each cmd or do things the right way: + + +If you don't already have a preferred environment manager (ce.g. conda), I recommend venv. You might need to install the operating system dependencies and python package with: +``` +sudo apt-get install python3-venv +python3 -m pip install venv +``` +You create a virutal environment in the folder `env` with: +``` +python3 -m venv env +``` +Then activate it: +``` +source env/bin/activate +``` +run `deactivate` to exit the virtual environment + +You might need to install pip? +``` +sudo -H python -m ensurepip +``` +To run the gzip_classifier.py you need to install some python dependencies: + +``` +python -m pip install numpy scikit-learn pandas tqdm scikit-learn-extra imbalanced-learn plotext +``` + +To reproduce the entire experiment, install `deckard` from this folder as working directory with: + +``` +python -m pip install ../../ +``` +which will run the setup.py script in the root directory of this repository. + +Additionally, we are using some optuna features that are not necessarily available in whatever version of hydra you have installed. Instead, install it from source, much like you did for this repository: + +``` +git clone https://github.com/facebookresearch/hydra +cd hydra +python -m pip install . +``` +Additionally, you need to install the hydra-optuna-sweeper plugin by navigating to the `examples/gzip/hydra/plugins/hydra_optuna_sweeper` folder and installing it with: +``` +python -m pip install . +``` + +Now, we need to specify a default configuration to test before our grid search. Return to this folder and run the parser, which will read the config files, accept command line overrides, and allow you to specify a default `dvc` `params.yaml` file that will contain the git-trackable defaults for your experiments. Run the parser with: +``` +python -m deckard.layers.parse --config_name default --config_folder conf +``` +Both of the displayed options are the default choices anyway, but you can change them to another config folder and file as you wish. This will create a `params.yaml` file in the current working directory using hydra's compose API. + +From here, we will let dvc manage our tasks, execution order, caching, and reproducibility. You can run the entire experiment with: + +``` +dvc repro +``` +which will read the `dvc.yaml` file, parsing any parameters specified in the `params.yaml` or any other specified file. + +It will then execute a "stage", which is a single shell command as well as "params", "deps", "outs", "metrics" and/or plots, which track the the parameters, dependencies, outputs, metric files, and/or plot files using DVC. + + You can specify an order of operations by requiring the output of an earlier stage to be a dependency of a later stage. + +To exploit the dvc file-tracking features, you can use dictionary keywords ( e.g. files.X ) when specifying a stage, regardless of usage within said stage. If you run the optimise script, it will overwrite files.name with the hash of the experiment config if and only if the name is set to default. Other entries: files.data_file, files.model_file, files.attack_file, will likewise be overwritten with the hash of the respective sub dictionary. + + Additionally, deckard will allow us to use the parameter tracking features of dvc as well as the configuration, launching, and optimization features of hydra by specifying a "stage", which will allow us to isolate pipeline stages from one another if so desired. See the `dvc.yaml` file for examples of this. In this example, it's mostly used for testing the functionality of different features in isolation of each other, but could be used for any arbitrary division of an ML pipeline you so choose. For example, we could have separate stages for sampling, feature selection, training, and evaluation. This is just a matter of passing different subsets of the configuration dictionary to the the run-time. The `deckard.layers.experiment` script will parse the entire params.yaml file, load or generate data, split it into train and test sets, fit a model, attack it (if so desired), make predictions, and score it according to the specified configs. diff --git a/examples/gzip/batchMixin.py b/examples/gzip/batchMixin.py new file mode 100644 index 00000000..5cc762b7 --- /dev/null +++ b/examples/gzip/batchMixin.py @@ -0,0 +1,255 @@ +from tqdm import tqdm +import logging +import numpy as np + + +from sklearn.datasets import make_classification +import random + +# from gzip_classifier import GzipSVC, GzipKNN, GzipLogisticRegressor +from sklearn.svm import SVC +from sklearn.model_selection import train_test_split +import plotext + +logger = logging.getLogger(__name__) + +train_scores = [] +test_scores = [] + + +class BatchedMixin: + def __init__( + self, + batch_size: int = 10, + max_batches: int = 100, + nb_epoch=1, + **kwargs, + ): + self.batch_size = kwargs.pop("m", batch_size) + self.max_batches = kwargs.pop("max_batches", max_batches) + nb_epoch = kwargs.pop("nb_epoch", nb_epoch) + if not nb_epoch >= 1: + nb_epoch = 1 + self.nb_epoch = nb_epoch + if "m" in kwargs: + logger.warning( + f"Parameter 'm' is being overwritten with batch_size={self.batch_size}.", + ) + kwargs["m"] = self.batch_size + super().__init__(**kwargs) + self.predict = self.batched_predict(self.predict) + if hasattr(self, "_find_best_samples"): + self._find_best_samples = self.batched_find_best_samples( + self._find_best_samples, + ) + if hasattr(self, "score"): + self.score = self.batched_score(self.score) + self.fit = self.batched_fit(self.fit) + self.predict = self.batched_predict(self.predict) + if self.nb_epoch > 1: + self.fit = self.epoch_fit(self.fit) + # self.score = self.batched_score(self.score) + + def epoch_fit(self, fit_func): + def wrapper(*args, **kwargs): + X, y = args + for i in range(self.nb_epoch): + random.shuffle(X) + random.shuffle(y) + fit_func(X, y, **kwargs) + + return wrapper + + def batched_fit(self, fit_func): + def wrapper(*args, **kwargs): + X_train, y_train = args + n = len(X_train) + n_batches = n // self.batch_size + if n_batches > self.max_batches: + logger.warning( + f"Number of batches ({n_batches}) is greater than max_batches ({self.max_batches}). Using max_batches.", + ) + n_batches = self.max_batches + for i in tqdm( + range(n_batches), + desc="Fitting batches", + total=n_batches, + leave=False, + dynamic_ncols=True, + ): + start = i * self.batch_size + end = (i + 1) * self.batch_size + X_batch = X_train[start:end] + y_batch = y_train[start:end] + print( + f"Shape of X_batch is {X_batch.shape} and shape of y_batch is {y_batch.shape}", + ) + fit_func(X_batch, y_batch, **kwargs) + if self.nb_epoch > 1: + continue + train_score = self.score(X_batch, y_batch) + test_score = self.score(X_train, y_train) + print( + f"Batch {i+1} of {n_batches} - Train score: {np.mean(train_score)}; Test score: {np.mean(test_score)}", + ) + train_scores.append(train_score) + test_scores.append(test_score) + + return wrapper + + def batched_find_best_samples(self, func): + def wrapper(method, **kwargs): + if "X" in kwargs: + X = kwargs["X"] + assert "y" in kwargs, "y must be provided if X is provided" + y = kwargs["y"] + append = True + else: + X = self.X_ + y = self.y_ + append = False + n_jobs = kwargs.pop("n_jobs", -1) + n = len(X) + n_batches = n // self.batch_size + if n_batches > self.max_batches: + n_batches = self.max_batches + elif n_batches == 0: + n_batches = 1 + for i in range(n_batches): + if append is True: + new_X = X[i * self.batch_size : (i + 1) * self.batch_size] # noqa + new_y = y[i * self.batch_size : (i + 1) * self.batch_size] # noqa + indices = func(X=new_X, y=new_y, method=method, n_jobs=n_jobs) + # print("After finding best samples") + # print(f"Length of indices is {len(indices)}") + X = X[indices] + y = y[indices] + self.X_ = X + self.y_ = y + self.distance_matrix = self.distance_matrix + else: + indices = func(method=method, n_jobs=n_jobs) + return indices + + return wrapper + + def batched_predict(self, predict_func): + def wrapper(*args, **kwargs): + X_test = args[0] + n = len(X_test) + n_batches = n // self.batch_size + if n_batches > self.max_batches: + n_batches = self.max_batches + elif n_batches == 0: + n_batches = 1 + preds = [] + for i in tqdm( + range(n_batches), + desc="Predicting batches", + total=n_batches, + leave=False, + dynamic_ncols=True, + ): + start = i * self.batch_size + end = (i + 1) * self.batch_size + X_batch = X_test[start:end] + new_preds = predict_func(X_batch, **kwargs) + preds.append(new_preds) + return np.concatenate(preds) + + return wrapper + + def batched_score(self, score_func): + def wrapper(*args, **kwargs): + X_test, y_test = args + n = len(X_test) + n_batches = n // self.batch_size + if n_batches > self.max_batches: + n_batches = self.max_batches + elif n_batches == 0: + n_batches = 1 + scores = [] + for i in tqdm( + range(n_batches), + desc="Scoring batches", + total=n_batches, + leave=False, + dynamic_ncols=True, + ): + start = i * self.batch_size + end = (i + 1) * self.batch_size + X_batch = X_test[start:end] + y_batch = y_test[start:end] + score = score_func(X_batch, y_batch, **kwargs) + scores.append(score) + return scores + + return wrapper + + +def create_batched_class(cls, *args, **kwargs): + name = cls.__name__ + + class BatchedClass(cls, BatchedMixin): + def __init__(self, *args, **kwargs): + self.max_batches = kwargs.pop("max_batches", 100) + self.batch_size = kwargs.pop("batch_size", 10) + super().__init__(*args, **kwargs) + + batched_class = BatchedClass() + combined_name = f"Batched{name}" + batched_class.__name__ = combined_name + batched_class.__init__(*args, **kwargs) + return batched_class + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + big_X = [] + big_y = [] + for i in range(100): + X, y = make_classification( + n_samples=100, + n_features=20, + n_informative=19, + n_redundant=1, + n_classes=2, + random_state=42 + i, + ) + big_X.extend(X.tolist()) + big_y.extend(y.tolist()) + big_X = np.array(big_X) + big_y = np.array(big_y) + logger.info(f"Shape of big_X: {big_X.shape}") + i = 42 + X, y = make_classification( + n_samples=10000, + n_features=20, + n_informative=19, + n_redundant=1, + n_classes=2, + random_state=42 + i, + ) + X_train, X_test, y_train, y_test = train_test_split( + X, + y, + test_size=0.2, + random_state=42, + ) + + class BatchedSVC(BatchedMixin, SVC): + pass + + clf = BatchedSVC(max_batches=100, batch_size=100, kernel="rbf") + clf.fit(X_train, y_train) + score = clf.score(X_test, y_test) + print(score) + input("Press enter to continue") + score = round(np.mean(score), 2) + std = round(np.std(score), 3) + logger.info(f"Final Score: {score}") + logger.info(f"Standard Deviation: {std}") + # if plotext_available is True: + plotext.scatter(train_scores, label="Train scores") + plotext.scatter(test_scores, label="Test scores") + plotext.plot() diff --git a/examples/gzip/conf/attack/.gitignore b/examples/gzip/conf/attack/.gitignore new file mode 100644 index 00000000..41ee9b5a --- /dev/null +++ b/examples/gzip/conf/attack/.gitignore @@ -0,0 +1 @@ +/best.yaml diff --git a/examples/gzip/conf/attack/attack_grid.yaml b/examples/gzip/conf/attack/attack_grid.yaml new file mode 100644 index 00000000..d7dc3050 --- /dev/null +++ b/examples/gzip/conf/attack/attack_grid.yaml @@ -0,0 +1,49 @@ +- attack.init.name: [art.attacks.evasion.FastGradientMethod] + attack.init.eps : [.01, .03, .3, .1] + attack.init.norm : ['inf', 1, 2] + attack.init.eps_step : [.001, .003, .01] + attack.init.batch_size : [100] + +- attack.init.name: [art.attacks.evasion.ProjectedGradientDescent] + attack.init.eps : [.01, .03, .3, .1] + attack.init.norm : ['inf', 1, 2] + attack.init.eps_step : [.001, .003, .01] + attack.init.batch_size : [100] + attack.init.max_iter : [10] + +- attack.init.name: [art.attacks.evasion.CarliniL0Method] + attack.init.batch_size : [100] + attack.init.max_iter : [10] + attack.init.confidence : [.1, .9, .99] + +- attack.init.name: [art.attacks.evasion.CarliniL2Method] + attack.init.batch_size : [100] + attack.init.max_iter : [10] + attack.init.confidence : [.1, .9, .99] + +- attack.init.name: [art.attacks.evasion.CarliniLInfMethod] + attack.init.max_iter : [10] + attack.init.confidence: [.1, .9, .99] + +- attack.init.name: [art.attacks.evasion.DeepFool] + attack.init.max_iter : [10] + attack.init.batch_size : [100] + attack.init.nb_grads : [10, 100, 1000] + +- attack.init.name: [art.attacks.evasion.HopSkipJump] + attack.init.max_iter : [10] + attack.init.max_eval : [10] + attack.init.init_eval : [10] + attack.init.norm : ['inf', 2] + +- attack.init.name: [art.attacks.evasion.PixelAttack] + attack.init.th : [.5, .9, .99] + attack.init.max_iter : [10] + +- attack.init.name: [art.attacks.evasion.ThresholdAttack] + attack.init.th : [.5, .9, .99] + attack.init.max_iter : [10] + +- attack.init.name: [art.attacks.evasion.AdversarialPatch] + attack.init.max_iter : [10] + attack.init.learning_rate : [.5, 5.0, 50.0] diff --git a/examples/gzip/conf/attack/default.yaml b/examples/gzip/conf/attack/default.yaml new file mode 100644 index 00000000..e0967ff7 --- /dev/null +++ b/examples/gzip/conf/attack/default.yaml @@ -0,0 +1,2 @@ +defaults: + - hsj diff --git a/examples/gzip/conf/attack/hsj.yaml b/examples/gzip/conf/attack/hsj.yaml new file mode 100644 index 00000000..9f4c7e02 --- /dev/null +++ b/examples/gzip/conf/attack/hsj.yaml @@ -0,0 +1,15 @@ +data: ${data} +model: ${model} +_target_ : deckard.base.attack.Attack +init: + name: art.attacks.evasion.HopSkipJump + model: ${model} + batch_size : 1024 + norm : 2 + max_iter : 10 + max_eval : 10 + init_eval : 10 + init_size : 10 + verbose : true +attack_size : 10 +method : evasion diff --git a/examples/gzip/conf/clean.yaml b/examples/gzip/conf/clean.yaml new file mode 100644 index 00000000..c5bc3dd5 --- /dev/null +++ b/examples/gzip/conf/clean.yaml @@ -0,0 +1,35 @@ +# params: + # control: + # data.sample.train_size: 100 + # defaults: + # model.init.m : -1 +# fillna: +# model.init.compressor : "None" +# model.init.metric : "ncd" +# model.init.method : "random" +# model.init.m : ${data.sample.random_state} +# model.init.precompute : "False" +replace: + model.init.metric: + jaro: "Jaro" + _winkler: "-Winkler" + levenshtein: "Levenshtein" + ncd: "NCD" + ratio: "Ratio" + seqRatio: "SeqRatio" + hamming: "Hamming" + gzip: "Gzip" + pkl: "Pickle" + bz2: "BZ2" + zstd: "Zstd" + lzma : "Lzma" + model_name: + GzipSVC : "k-SVC" + GzipLogisticRegressor : "k-Logistic" + GzipKNN : "k-KNN" + model.init.symmetric: + True: "Symmetric" + False: "Asymmetric" +drop_values: + accuracy : 0.00000000000 + predict_time : 1.00000000000 diff --git a/examples/gzip/conf/condense.yaml b/examples/gzip/conf/condense.yaml new file mode 100644 index 00000000..a64263ea --- /dev/null +++ b/examples/gzip/conf/condense.yaml @@ -0,0 +1,69 @@ +defaults: + # - _target_ : deckard.base.experiment.Experiment + - _self_ + - data: ??? + - model: ??? + - files: default + - scorers: default + - override hydra/sweeper : optuna + - override hydra/sweeper/sampler : tpe + - override hydra/launcher : joblib +dataset : ??? +model_name : ??? +stage : train +direction : + - maximize + - minimize +optimizers: + - accuracy + - predict_time +device_id : ${oc.env:DECKARD_DEVICE_ID, "cpu"} +hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir : ${hydra.job.num} + callbacks: + study_dump: + _target_ : database.OptunaStudyDumpCallback + storage : ${hydra.sweeper.storage} + study_name : ${hydra.sweeper.study_name} + directions : ${direction} + metric_names : ${optimizers} + output_file : ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + # group: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials : 2 + direction: ${direction} + max_failure_rate: 1.0 + params: + ++data.sample.train_size : 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m : tag(log, interval(.01, .1)) + +model.init.sampling_method : medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer : processes + verbose: 1 + timeout: null + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r diff --git a/examples/gzip/conf/condense_knn.yaml b/examples/gzip/conf/condense_knn.yaml new file mode 100644 index 00000000..52bd92be --- /dev/null +++ b/examples/gzip/conf/condense_knn.yaml @@ -0,0 +1,72 @@ +defaults: + # - _target_ : deckard.base.experiment.Experiment + - _self_ + - data: ??? + - model: gzip_knn + - files: default + - scorers: default + - override hydra/sweeper : optuna + - override hydra/sweeper/sampler : tpe + - override hydra/launcher : joblib +dataset : ??? +model_name : gzip_knn +stage : train +direction : + - maximize +optimizers: + - accuracy +device_id : ${oc.env:DECKARD_DEVICE_ID, "cpu"} +hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir : ${hydra.job.num} + callbacks: + study_dump: + _target_ : database.OptunaStudyDumpCallback + storage : ${hydra.sweeper.storage} + study_name : ${hydra.sweeper.study_name} + directions : ${direction} + metric_names : ${optimizers} + output_file : ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k : 1,3,5,7,11 + +model.init.weights : uniform,distance + +model.init.algorithm : brute + model.init.symmetric : True,False + ++model.init.precompute : True + model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name : ${model_name} + data.sample.random_state: 0,1,2,3,4,5,6,7,8,9 + model.init.m: tag(log, interval(.1, 1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer : processes + verbose: 1 + timeout: null + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r diff --git a/examples/gzip/conf/condense_logistic.yaml b/examples/gzip/conf/condense_logistic.yaml new file mode 100644 index 00000000..5a585b06 --- /dev/null +++ b/examples/gzip/conf/condense_logistic.yaml @@ -0,0 +1,75 @@ +defaults: + # - _target_ : deckard.base.experiment.Experiment + - _self_ + - data: ??? + - model: gzip_logistic + - files: default + - scorers: default + - override hydra/sweeper : optuna + - override hydra/sweeper/sampler : tpe + - override hydra/launcher : joblib +dataset : ??? +model_name : gzip_logistic +stage : train +direction : + - maximize +optimizers: + - accuracy +device_id : ${oc.env:DECKARD_DEVICE_ID, "cpu"} +hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir : ${hydra.job.id} + callbacks: + study_dump: + _target_ : database.OptunaStudyDumpCallback + storage : ${hydra.sweeper.storage} + study_name : ${hydra.sweeper.study_name} + directions : ${direction} + metric_names : ${optimizers} + output_file : ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials : 1 + params: + +model.init.solver: saga + +model.init.penalty : l2,l1,l2,none + +model.init.tol : 1e-4,1e-3,1e-2 + +model.init.C : 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept : True,False + +model.init.class_weight : balanced,None + model.init.symmetric : True,False + ++model.init.precompute : True + model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name : ${model_name} + data.sample.random_state: 0,1,2,3,4,5,6,7,8,9 + model.init.m: tag(log, interval(.1, 1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer : processes + verbose: 1 + timeout: null + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r diff --git a/examples/gzip/conf/condense_svc.yaml b/examples/gzip/conf/condense_svc.yaml new file mode 100644 index 00000000..478c9c97 --- /dev/null +++ b/examples/gzip/conf/condense_svc.yaml @@ -0,0 +1,74 @@ +defaults: + # - _target_ : deckard.base.experiment.Experiment + - _self_ + - data: ??? + - model: gzip_svc + - files: default + - scorers: default + - override hydra/sweeper : optuna + - override hydra/sweeper/sampler : tpe + - override hydra/launcher : joblib +dataset : ??? +model_name : gzip_svc +stage : train +direction : + - maximize +optimizers: + - accuracy +device_id : ${oc.env:DECKARD_DEVICE_ID, "cpu"} +hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir : ${hydra.job.id} + callbacks: + study_dump: + _target_ : database.OptunaStudyDumpCallback + storage : ${hydra.sweeper.storage} + study_name : ${hydra.sweeper.study_name} + directions : + - maximize + metric_names : + - accuracy + output_file : ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ??? + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials : 2 + params: + +model.init.kernel : rbf,precomputed + +model.init.C : 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma : scale,auto + +model.init.class_weight : balanced,null + ++model.init.precompute : True + model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name : ${model_name} + data.sample.random_state: 0,1,2,3,4,5,6,7,8,9 + model.init.m: tag(log, interval(.1, 1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer : processes + verbose: 1 + timeout: null + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r diff --git a/examples/gzip/conf/condensed_plots.yaml b/examples/gzip/conf/condensed_plots.yaml new file mode 100644 index 00000000..268802a3 --- /dev/null +++ b/examples/gzip/conf/condensed_plots.yaml @@ -0,0 +1,61 @@ +line_plot: + - file : sampling_method_vs_accuracy.pdf + hue: model.init.sampling_method + title: #"Accuracy vs Sampling Method" + x : model.init.m + xlabel: Percentage of Samples per Class + y : accuracy + ylabel: Accuracy + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim : [0, 1] + y_scale : linear + legend: {"title": "Sampling Method", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + - file: sampling_method_vs_train_time.pdf + hue: model.init.sampling_method + title: #"Training Time vs Sampling Method" + x : model.init.m + xlabel: Percentage of Samples per Class + y : train_time + ylabel: Training Time (s) + y_scale : linear + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim : [0, 1] + legend: {"title": "Sampling Method", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + - file : sampling_method_vs_predict_time.pdf + hue: model.init.sampling_method + title: #"Prediction Time vs Sampling Method" + x : model.init.m + xlabel: Percentage of Samples per Class + y : predict_time + ylabel: Prediction Time (s) + y_scale : log + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim : [0, 1] + legend: {"title": "Sampling Method", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} diff --git a/examples/gzip/conf/data/ddos.yaml b/examples/gzip/conf/data/ddos.yaml new file mode 100644 index 00000000..ed201a5e --- /dev/null +++ b/examples/gzip/conf/data/ddos.yaml @@ -0,0 +1,12 @@ +_target_: deckard.base.data.Data +name: raw_data/ddos_undersampled_10000.csv +target: 'Label' +drop: + - 'Timestamp' # Drop the timestamp column + - 'Unnamed: 0' # Drop the index column +sample: + random_state : 0 + train_size : 100 + test_size : 100 + stratify: True + shuffle : True diff --git a/examples/gzip/conf/data/default.yaml b/examples/gzip/conf/data/default.yaml new file mode 100644 index 00000000..e4aa937b --- /dev/null +++ b/examples/gzip/conf/data/default.yaml @@ -0,0 +1,3 @@ +defaults: + - ddos + diff --git a/examples/gzip/conf/data/kdd_nsl.yaml b/examples/gzip/conf/data/kdd_nsl.yaml new file mode 100644 index 00000000..48bc817b --- /dev/null +++ b/examples/gzip/conf/data/kdd_nsl.yaml @@ -0,0 +1,11 @@ +_target_: deckard.base.data.Data +drop: + - id +sample: + _target_: deckard.base.data.SklearnDataSampler + random_state : 0 + stratify: True + train_size : 100 + test_size : 100 +name: raw_data/kdd_nsl_undersampled_5000.csv +target: label diff --git a/examples/gzip/conf/data/sms_spam.yaml b/examples/gzip/conf/data/sms_spam.yaml new file mode 100644 index 00000000..5741cfb0 --- /dev/null +++ b/examples/gzip/conf/data/sms_spam.yaml @@ -0,0 +1,9 @@ +_target_: deckard.base.data.Data +name: raw_data/sms-spam_undersampled_1450.csv +target: 'label' +sample: + random_state : 0 + train_size : 100 + test_size : 100 + stratify: True + shuffle : True diff --git a/examples/gzip/conf/data/truthseeker.yaml b/examples/gzip/conf/data/truthseeker.yaml new file mode 100644 index 00000000..baa78bb3 --- /dev/null +++ b/examples/gzip/conf/data/truthseeker.yaml @@ -0,0 +1,9 @@ +_target_: deckard.base.data.Data +name: raw_data/truthseeker_undersampled_8000.csv +target: 'BotScoreBinary' +sample: + random_state : 0 + train_size : 100 + test_size : 100 + stratify: True + shuffle : True diff --git a/examples/gzip/conf/default.yaml b/examples/gzip/conf/default.yaml new file mode 100644 index 00000000..a0a93718 --- /dev/null +++ b/examples/gzip/conf/default.yaml @@ -0,0 +1,53 @@ +defaults: + # - _target_ : deckard.base.experiment.Experiment + - _self_ + - data: kdd_nsl + - model: default + - files: default + - scorers: default + - override hydra/sweeper : optuna + - override hydra/sweeper/sampler : grid + - override hydra/launcher : joblib +dataset : kdd_nsl +model_name : gzip_knn +stage : train +direction : + - maximize +optimizers: + - accuracy +device_id : ${oc.env:DECKARD_DEVICE_ID, "cpu"} +hydra: + run: + dir: ${files.directory}/logs/${stage}/ + sweep: + dir: ${files.directory}/logs/ + subdir : ${hydra.sweeper.study_name}/${hydra.job.id} + callbacks: + study_dump: + _target_ : database.OptunaStudyDumpCallback + storage : ${hydra.sweeper.storage} + study_name : ${hydra.sweeper.study_name} + directions : ${direction} + metric_names : ${optimizers} + output_file : ${files.directory}/${hydra.sweep.dir}/${hydra.sweeper.study_name}.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${model_name}_${dataset} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials : 1 + params: + direction: ${direction} + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: -1 + prefer : processes + verbose: 1 + timeout: null + pre_dispatch: n_jobs*2 + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r diff --git a/examples/gzip/conf/files/default.yaml b/examples/gzip/conf/files/default.yaml new file mode 100644 index 00000000..5c28f314 --- /dev/null +++ b/examples/gzip/conf/files/default.yaml @@ -0,0 +1,11 @@ +_target_: deckard.base.files.FileConfig +reports: reports +data_dir: data +# data_file : ${data.sample.random_state}-${data.sample.train_size} +data_type: .csv +model_dir : model +directory: ${dataset} +score_dict_file: score_dict.json +params_file : params.yaml +predictions_file : predictions.json +name : default diff --git a/examples/gzip/conf/gzip_knn.yaml b/examples/gzip/conf/gzip_knn.yaml new file mode 100644 index 00000000..da8b7ca5 --- /dev/null +++ b/examples/gzip/conf/gzip_knn.yaml @@ -0,0 +1,70 @@ +defaults: + # - _target_ : deckard.base.experiment.Experiment + - _self_ + - data: ??? + - model: gzip_knn + - files: default + - scorers: default + - override hydra/sweeper : optuna + - override hydra/sweeper/sampler : tpe + - override hydra/launcher : joblib +dataset : ??? +model_name : gzip_knn +stage : train +direction : + - maximize +optimizers: + - accuracy +device_id : ${oc.env:DECKARD_DEVICE_ID, "cpu"} +hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir : ${hydra.job.num} + callbacks: + study_dump: + _target_ : database.OptunaStudyDumpCallback + storage : ${hydra.sweeper.storage} + study_name : ${hydra.sweeper.study_name} + directions : ${direction} + metric_names : ${optimizers} + output_file : ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k : 1,3,5,7,11 + +model.init.weights : uniform,distance + +model.init.algorithm : brute + model.init.symmetric : True,False + ++model.init.precompute : True + model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name : ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer : processes + verbose: 1 + timeout: null + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r diff --git a/examples/gzip/conf/gzip_logistic.yaml b/examples/gzip/conf/gzip_logistic.yaml new file mode 100644 index 00000000..3636c201 --- /dev/null +++ b/examples/gzip/conf/gzip_logistic.yaml @@ -0,0 +1,73 @@ +defaults: + # - _target_ : deckard.base.experiment.Experiment + - _self_ + - data: ??? + - model: gzip_logistic + - files: default + - scorers: default + - override hydra/sweeper : optuna + - override hydra/sweeper/sampler : tpe + - override hydra/launcher : joblib +dataset : ??? +model_name : gzip_logistic +stage : train +direction : + - maximize +optimizers: + - accuracy +device_id : ${oc.env:DECKARD_DEVICE_ID, "cpu"} +hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir : ${hydra.job.id} + callbacks: + study_dump: + _target_ : database.OptunaStudyDumpCallback + storage : ${hydra.sweeper.storage} + study_name : ${hydra.sweeper.study_name} + directions : ${direction} + metric_names : ${optimizers} + output_file : ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials : 1 + params: + +model.init.solver: saga + +model.init.penalty : l2,l1,l2,none + +model.init.tol : 1e-4,1e-3,1e-2 + +model.init.C : 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept : True,False + +model.init.class_weight : balanced,None + model.init.symmetric : True,False + ++model.init.precompute : True + model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name : ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer : processes + verbose: 1 + timeout: null + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r diff --git a/examples/gzip/conf/gzip_svc.yaml b/examples/gzip/conf/gzip_svc.yaml new file mode 100644 index 00000000..42212998 --- /dev/null +++ b/examples/gzip/conf/gzip_svc.yaml @@ -0,0 +1,73 @@ +defaults: + # - _target_ : deckard.base.experiment.Experiment + - _self_ + - data: ??? + - model: gzip_svc + - files: default + - scorers: default + - override hydra/sweeper : optuna + - override hydra/sweeper/sampler : tpe + - override hydra/launcher : joblib +dataset : ??? +model_name : gzip_svc +stage : train +direction : + - maximize +optimizers: + - accuracy +device_id : ${oc.env:DECKARD_DEVICE_ID, "cpu"} +hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir : ${hydra.job.id} + callbacks: + study_dump: + _target_ : database.OptunaStudyDumpCallback + storage : ${hydra.sweeper.storage} + study_name : ${hydra.sweeper.study_name} + directions : + - maximize + metric_names : + - accuracy + output_file : ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials : 2 + params: + +model.init.kernel : rbf,precomputed + +model.init.C : 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma : scale,auto + +model.init.class_weight : balanced,null + model.init.symmetric : True,False + ++model.init.precompute : True + model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name : ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer : processes + verbose: 1 + timeout: null + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r diff --git a/examples/gzip/conf/model/.gitignore b/examples/gzip/conf/model/.gitignore new file mode 100644 index 00000000..3101d5ea --- /dev/null +++ b/examples/gzip/conf/model/.gitignore @@ -0,0 +1,12 @@ +/best_gzip_knn_kdd_nsl.yaml +/best_gzip_svc_kdd_nsl.yaml +/best_gzip_logistic_kdd_nsl.yaml +/best_gzip_knn_truthseeker.yaml +/best_gzip_svc_truthseeker.yaml +/best_gzip_logistic_truthseeker.yaml +/best_gzip_knn_ddos.yaml +/best_gzip_svc_ddos.yaml +/best_gzip_logistic_ddos.yaml +/best_gzip_knn_sms_spam.yaml +/best_gzip_svc_sms_spam.yaml +/best_gzip_logistic_sms_spam.yaml diff --git a/examples/gzip/conf/model/default.yaml b/examples/gzip/conf/model/default.yaml new file mode 100644 index 00000000..1aa36bb4 --- /dev/null +++ b/examples/gzip/conf/model/default.yaml @@ -0,0 +1,2 @@ +defaults: + - gzip_knn.yaml diff --git a/examples/gzip/conf/model/gzip_knn.yaml b/examples/gzip/conf/model/gzip_knn.yaml new file mode 100644 index 00000000..fc926aaf --- /dev/null +++ b/examples/gzip/conf/model/gzip_knn.yaml @@ -0,0 +1,11 @@ +data: ${data} +library : sklearn +init: + _target_: deckard.base.model.ModelInitializer + name : gzip_classifier.GzipKNN + k : 1 + m: -1 + metric : gzip + symmetric : false + distance_matrix : ${files.directory}/${files.model_dir}/${model.init.metric}/${data.sample.train_size}-${data.sample.test_size}/${data.sample.random_state}.npz +_target_: deckard.base.model.Model diff --git a/examples/gzip/conf/model/gzip_logistic.yaml b/examples/gzip/conf/model/gzip_logistic.yaml new file mode 100644 index 00000000..ea95121c --- /dev/null +++ b/examples/gzip/conf/model/gzip_logistic.yaml @@ -0,0 +1,10 @@ +data: ${data} +library : sklearn +init: + _target_: deckard.base.model.ModelInitializer + name : gzip_classifier.GzipLogisticRegressor + m: -1 + metric : gzip + symmetric : false + distance_matrix : ${files.directory}/${files.model_dir}/${model.init.metric}/${data.sample.train_size}-${data.sample.test_size}/${data.sample.random_state}.npz +_target_: deckard.base.model.Model diff --git a/examples/gzip/conf/model/gzip_svc.yaml b/examples/gzip/conf/model/gzip_svc.yaml new file mode 100644 index 00000000..58111a99 --- /dev/null +++ b/examples/gzip/conf/model/gzip_svc.yaml @@ -0,0 +1,10 @@ +data: ${data} +library : sklearn +init: + _target_: deckard.base.model.ModelInitializer + name : gzip_classifier.GzipSVC + m: -1 + distance_matrix : ${files.directory}/${files.model_dir}/${model.init.metric}/${data.sample.train_size}-${data.sample.test_size}/${data.sample.random_state}.npz + metric : gzip + symmetric : false +_target_: deckard.base.model.Model diff --git a/examples/gzip/conf/plots.yaml b/examples/gzip/conf/plots.yaml new file mode 100644 index 00000000..eac757c4 --- /dev/null +++ b/examples/gzip/conf/plots.yaml @@ -0,0 +1,169 @@ +line_plot: +- file: metric_vs_accuracy.pdf + hue: model.init.metric + title: #"Accuracy vs $m$-best samples" + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: [10, 500] + legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} +- file: metric_vs_train_time.pdf + hue: model.init.metric + title: #"Training Time vs $m$-best samples" + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: [10, 500] + legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} +- file: metric_vs_predict_time.pdf + hue: model.init.metric + title: #"Prediction Time vs $m$-best samples" + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: [10, 500] + legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} +cat_plot: + - file: symmetric_vs_metric.pdf + x : model.init.symmetric + y : accuracy + hue : model.init.metric + errorbar: se + kind : bar + titles : + xlabels : "" + ylabels : Accuracy + legend_title: "Metrics" + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + set: + yscale: linear + ylim: [0, 1] + - file: symmetric_vs_metric_train_time.pdf + x : model.init.symmetric + y : train_time + hue : model.init.metric + errorbar: se + kind : bar + titles : + xlabels : "" + ylabels : Training Time (s) + legend_title: "Metrics" + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + set: + yscale: log + - file: models_vs_accuracy.pdf + x : model_name + y : accuracy + hue : data.sample.train_size + errorbar: se + kind : boxen + titles : + xlabels : Model + ylabels : Accuracy + legend_title: "Samples" + + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + set: + yscale: linear + ylim: [0, 1] + rotation: 90 + - file: models_vs_train_time.pdf + x : model_name + y : accuracy + hue : data.sample.train_size + errorbar: se + kind : bar + titles : + xlabels : Model + ylabels : Training Time (s) + legend_title: "Samples" + rotation: 90 + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + set: + yscale: log + - file: models_vs_predict_time.pdf + x : model_name + y : accuracy + hue : data.sample.train_size + errorbar: se + kind : bar + titles : + xlabels : Model + ylabels : Prediction Time (s) + legend_title: "Samples" + + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + set: + yscale: log + rotation: 90 diff --git a/examples/gzip/conf/scorers/default.yaml b/examples/gzip/conf/scorers/default.yaml new file mode 100644 index 00000000..108c1520 --- /dev/null +++ b/examples/gzip/conf/scorers/default.yaml @@ -0,0 +1,10 @@ +_target_: deckard.base.scorer.ScorerDict +accuracy: + _target_: deckard.base.scorer.ScorerConfig + name: sklearn.metrics.accuracy_score + direction: maximize + +log_loss: + _target_: deckard.base.scorer.ScorerConfig + name: sklearn.metrics.log_loss + direction: minimize diff --git a/examples/gzip/conf/scorers/regression.yaml b/examples/gzip/conf/scorers/regression.yaml new file mode 100644 index 00000000..0ff16a7c --- /dev/null +++ b/examples/gzip/conf/scorers/regression.yaml @@ -0,0 +1,13 @@ +_target_: deckard.base.scorer.ScorerDict +explained_variance: + _target_: deckard.base.scorer.ScorerConfig + name: sklearn.metrics.explained_variance_score + direction: maximize +r2: + _target_: deckard.base.scorer.ScorerConfig + name: sklearn.metrics.r2_score + direction: maximize +mse: + _target_: deckard.base.scorer.ScorerConfig + name: sklearn.metrics.mean_squared_error + direction: minimize diff --git a/examples/gzip/data_prep.py b/examples/gzip/data_prep.py new file mode 100644 index 00000000..9ab72bb8 --- /dev/null +++ b/examples/gzip/data_prep.py @@ -0,0 +1,145 @@ +import pandas as pd +from pathlib import Path +import logging +from imblearn.under_sampling import RandomUnderSampler + +try: + import plotext as plt + + plot = True +except ImportError: + plot = False + +logger = logging.getLogger(__name__) +if plot is False: + logger.warning("plotext not installed. Skipping plots.") + + +def undersample(df, target, n_samples=10000): + """ + Undersamples the dataframe to balance the target column + """ + y = df[target] + X = df.drop(target, axis=1) + n_classes = y.value_counts().shape[0] + keys = y.value_counts().keys() + values = [n_samples // n_classes] * len(keys) + sampling_strategy = dict(zip(keys, values)) + rus = RandomUnderSampler(random_state=0, sampling_strategy=sampling_strategy) + X_resampled, y_resampled = rus.fit_resample(X, y) + df_resampled = pd.concat([X_resampled, y_resampled], axis=1) + return df_resampled + + +def undersample_datasets(datasets: list, targets: list, n_samples: list): + """ + Undersamples the datasets to balance the target columns + """ + assert len(datasets) == len( + targets, + ), "The number of datasets and targets must be the same" + for i in range(len(datasets)): + path = Path(datasets[i]) + print(f"Undersampling {path.as_posix()}") + df = pd.read_csv(datasets[i]) + df = undersample(df, targets[i], n_samples[i]) + new_name = path.stem + f"_undersampled_{n_samples[i]}.csv" + new_name = path.parent / new_name + print(f"Renaming to {new_name}") + Path(new_name).parent.mkdir(parents=True, exist_ok=True) + df.to_csv(new_name, index=False) + print(f"Saved to {new_name}") + + +if __name__ == "__main__": + Path("raw_data").mkdir(parents=True, exist_ok=True) + df = pd.read_csv( + "https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/34e546e4813f154d11d4f13869b9e3481fc3e829/kdd_nsl.csv", + ) + del df["difficulty_level"] + X = df.drop("label", axis=1) + y = df["label"] + y = pd.DataFrame(y, columns=["label"]) + df = pd.concat([X, y], axis=1) + df.to_csv("raw_data/kdd_nsl.csv", index=False) + # Find the number of entries for each label + counts = pd.DataFrame(df["label"]).value_counts().values + labels = range(len(counts)) + # Plot the counts + if plot is True: + plt.simple_bar(labels, counts, title="KDD NSL Label Counts", width=50) + plt.show() + else: + logger.info("Label counts for KDD NSL: {}".format(counts)) + df = pd.read_csv( + "https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/34e546e4813f154d11d4f13869b9e3481fc3e829/truthseeker.csv", + ) + X = df["tweet"] + label = "BotScoreBinary" + y = df[label] + df = pd.concat([X, y], axis=1) + df.to_csv("raw_data/truthseeker.csv", index=False) + # Find the number of entries for each label + counts = pd.DataFrame(df[label]).value_counts().values + labels = range(len(counts)) + if plot is True: + # Plot the counts + plt.simple_bar(labels, counts, title="Truthseeker Label Counts", width=50) + plt.show() + else: + logger.info("Label counts for Truthseeker: {}".format(counts)) + df = pd.read_csv( + "https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/c91944733b8f2b9a6ac0b8c8fab01ddcdf0898eb/sms-spam.csv", + ) + X = df["message"] + y = df["label"] + y = y.str.replace("ham", "0").replace("spam", "1") + df = pd.concat([X, y], axis=1) + df.to_csv("raw_data/sms-spam.csv", index=False) + # Find the number of entries for each label + counts = pd.DataFrame(df["label"]).value_counts().values + labels = range(len(counts)) + # Plot the counts + if plot is True: + plt.simple_bar(labels, counts, title="SMS Spam Label Counts", width=50) + plt.show() + else: + logger.info("Label counts for SMS Spam: {}".format(counts)) + df = pd.read_csv( + "https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/712b528dcd212d5a6d1767332f50161fc1cfe55c/ddos.csv", + ) + # Find the number of entries for each label + X = df.drop("Label", axis=1) + y = df["Label"] + y = y.str.replace("Benign", "0").replace("ddos", "1") + df = pd.concat([X, y], axis=1) + df.to_csv("raw_data/ddos.csv", index=False) + counts = pd.DataFrame(y).value_counts().values + labels = range(len(counts)) + # Plot the counts + if plot is True: + plt.simple_bar(labels, counts, title="DDoS Label Counts", width=50) + plt.show() + else: + logger.info("Label counts for DDoS: {}".format(counts)) + + datasets = [ + "raw_data/kdd_nsl.csv", + "raw_data/truthseeker.csv", + "raw_data/sms-spam.csv", + "raw_data/ddos.csv", + ] + targets = [ + "label", # kdd_nsl + "BotScoreBinary", # truthseeker + "label", # sms_spam + "Label", # ddos + ] + n_samples = [ + 5000, # kdd_nsl + 8000, # truthseeker + 1450, # sms_spam + 10000, # ddos + ] + + paths = undersample_datasets(datasets, targets, n_samples) diff --git a/examples/gzip/database.py b/examples/gzip/database.py new file mode 100644 index 00000000..7670a911 --- /dev/null +++ b/examples/gzip/database.py @@ -0,0 +1,126 @@ +# Script to query the database + +from omegaconf import DictConfig, ListConfig, OmegaConf +from dataclasses import dataclass +import optuna +from pathlib import Path +from hydra.experimental.callback import Callback +import argparse + +storage = "sqlite:///optuna.db" +study_name = "gzip_knn_20-0" +metric_names = ["accuracy"] +directions = ["maximize"] +output_file = "optuna.csv" + + +@dataclass +class OptunaStudyDumpCallback(Callback): + def __init__( + self, + storage: str, + study_name: str, + metric_names: list, + directions: list, + output_file: str, + seed=42, + ): + self.storage = storage + self.study_name = study_name + if isinstance(metric_names, ListConfig): + self.metric_names = OmegaConf.to_container(metric_names, resolve=True) + elif isinstance(metric_names, list): + self.metric_names = metric_names + else: + self.metric_names = [metric_names] + if isinstance(directions, ListConfig): + self.metric_names = OmegaConf.to_container(directions, resolve=True) + elif isinstance(directions, list): + self.directions = directions + else: + self.directions = [directions] + self.output_file = output_file + super().__init__() + + def on_multirun_start(self, config: DictConfig, **kwargs) -> None: + studies = optuna.get_all_study_names(self.storage) + study_names = [study for study in studies] + # study_names = [study.study_name for study in studies] + assert ( + self.study_name in study_names + ), f"Study {self.study_name} not found in {study_names}" + study = optuna.load_study(self.study_name, storage=self.storage) + if hasattr(study, "set_metric_names"): + study.set_metric_names(self.metric_names) + else: + print("Cannot set metric names") + + def on_multirun_end(self, *args, **kwargs) -> None: + studies = optuna.get_all_study_names(self.storage) + study_names = [study for study in studies] + assert ( + self.study_name in study_names + ), f"Study {self.study_name} not found in {study_names}" + study = optuna.load_study(self.study_name, storage=self.storage) + df = study.trials_dataframe() + if len(self.metric_names) == 1: + metric_names = [f"value_{metric}" for metric in self.metric_names] + df = df.sort_values(metric_names[0], ascending=False) + else: + metric_names = [f"values_{metric}" for metric in self.metric_names] + df = df.sort_values(metric_names, ascending=False) + suffix = Path(self.output_file).suffix + if suffix in [".csv"]: + df.to_csv(self.output_file, index=False) + elif suffix in [".json"]: + df.to_json(self.output_file, orient="records") + else: + raise ValueError(f"Unknown file type {suffix}") + + assert Path(self.output_file).exists(), f"File {self.output_file} not found" + + +def multirun_call(args): + storage = args.storage + study_name = args.study_name + metric_names = ( + args.metric_names + if isinstance(args.metric_names, list) + else [args.metric_names] + ) + directions = ( + args.directions if isinstance(args.directions, list) else [args.directions] + ) + output_file = args.output_file + + callback = OptunaStudyDumpCallback( + storage, + study_name, + metric_names, + directions, + output_file, + ) + callback.on_multirun_start() + callback.on_multirun_end() + + +optuna_callback_parser = argparse.ArgumentParser() +optuna_callback_parser.add_argument("--storage", type=str, default=storage) +optuna_callback_parser.add_argument("--study_name", type=str, default=study_name) +optuna_callback_parser.add_argument( + "--metric_names", + type=str, + nargs="+", + default=metric_names, +) +optuna_callback_parser.add_argument( + "--directions", + type=str, + nargs="+", + default=directions, +) +optuna_callback_parser.add_argument("--output_file", type=str, default=output_file) + +if __name__ == "__main__": + args = optuna_callback_parser.parse_args() + multirun_call(args) diff --git a/examples/gzip/dvc.lock b/examples/gzip/dvc.lock new file mode 100644 index 00000000..a02a4b1d --- /dev/null +++ b/examples/gzip/dvc.lock @@ -0,0 +1,19910 @@ +schema: '2.0' +stages: + train: + cmd: python -m deckard.layers.experiment train + deps: + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + - path: raw_data/ + hash: md5 + md5: 33d46673e0631bef98be9e8991ed1ed1.dir + size: 50328647 + nfiles: 8 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/reports/train/default/predictions.json + hash: md5 + md5: 986d2f0abe9b96253b196a222a550609 + size: 702 + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + test_each_method@knn-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=knn model.init.m=10 files.name=knn + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/knn + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: f8a4019adc566855c2a704a0311ff7c4 + size: 489 + - path: params.yaml + hash: md5 + md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 + size: 1467 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/method/knn + hash: md5 + md5: f902bdd8882aa06bba0d1fef19c4a313.dir + size: 11613 + nfiles: 4 + - path: kdd_nsl/reports/train/knn/score_dict.json + hash: md5 + md5: 4e7f0750779df5202e5dec6228f94f99 + size: 490 + test_each_method@knn-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=knn model.init.m=10 files.name=knn + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn + hydra.run.dir=truthseeker/logs/method/knn ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: f8a4019adc566855c2a704a0311ff7c4 + size: 489 + - path: params.yaml + hash: md5 + md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 + size: 1467 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/method/knn + hash: md5 + md5: 5a52da2681ff444c53a1623722c2d431.dir + size: 11642 + nfiles: 4 + - path: truthseeker/reports/train/knn/score_dict.json + hash: md5 + md5: f09f746efa5c7a56f4dd1a3e20a7ab6b + size: 485 + test_each_method@svc-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=svc model.init.m=10 files.name=svc + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/svc + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: f8a4019adc566855c2a704a0311ff7c4 + size: 489 + - path: params.yaml + hash: md5 + md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 + size: 1467 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/method/svc + hash: md5 + md5: 433b30d37ba64e71527ac2d837b44fa2.dir + size: 11612 + nfiles: 4 + - path: kdd_nsl/reports/train/svc/score_dict.json + hash: md5 + md5: f41538adb6ffa9182ea126c85c353abf + size: 489 + test_each_method@svc-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=svc model.init.m=10 files.name=svc + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn + hydra.run.dir=truthseeker/logs/method/svc ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: f8a4019adc566855c2a704a0311ff7c4 + size: 489 + - path: params.yaml + hash: md5 + md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 + size: 1467 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/method/svc + hash: md5 + md5: bc37655235ef0d2919a62c85456d379c.dir + size: 11645 + nfiles: 4 + - path: truthseeker/reports/train/svc/score_dict.json + hash: md5 + md5: 97f1fed3ee2887773ca9a50eeeb5b1ed + size: 488 + test_each_method@medoid-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=medoid model.init.m=10 files.name=medoid + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/medoid + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: f8a4019adc566855c2a704a0311ff7c4 + size: 489 + - path: params.yaml + hash: md5 + md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 + size: 1467 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/method/medoid + hash: md5 + md5: 5b972c1f6a8c4ebff94a088e2be12b28.dir + size: 11661 + nfiles: 4 + - path: kdd_nsl/reports/train/medoid/score_dict.json + hash: md5 + md5: 10a0913632dea0d6717263ba1854b1e2 + size: 484 + test_each_method@medoid-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=medoid model.init.m=10 files.name=medoid + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=medoid + hydra.run.dir=truthseeker/logs/method/medoid ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 064e5bb42979e36c917c538b2a7bc0cc + size: 489 + - path: params.yaml + hash: md5 + md5: 8e937140db56a135e97c05461c573520 + size: 1345 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/method/medoid + hash: md5 + md5: 7b6fef8487e5b8dec0f76f4b4fc59ccb.dir + size: 10226 + nfiles: 4 + - path: truthseeker/reports/train/medoid/score_dict.json + hash: md5 + md5: 8cebb3ee0098d2ee2bb4130e346e8e0f + size: 282 + test_each_method@sum-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=sum model.init.m=10 files.name=sum + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/sum + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: f8a4019adc566855c2a704a0311ff7c4 + size: 489 + - path: params.yaml + hash: md5 + md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 + size: 1467 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/method/sum + hash: md5 + md5: 41cd7632a1d85e7380d14b0e8eccc819.dir + size: 11607 + nfiles: 4 + - path: kdd_nsl/reports/train/sum/score_dict.json + hash: md5 + md5: 2a97e468ea2e9071e1f7d5bdb1e7495b + size: 484 + test_each_method@sum-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=sum model.init.m=10 files.name=sum + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=sum + hydra.run.dir=truthseeker/logs/method/sum ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 064e5bb42979e36c917c538b2a7bc0cc + size: 489 + - path: params.yaml + hash: md5 + md5: 8e937140db56a135e97c05461c573520 + size: 1345 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/method/sum + hash: md5 + md5: e7f9741f777d98f3d3416264b9f3e6b2.dir + size: 10164 + nfiles: 4 + - path: truthseeker/reports/train/sum/score_dict.json + hash: md5 + md5: d49a3cbdeb348bbf9ad3b59e9e8e0e32 + size: 283 + test_each_method@random-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=random model.init.m=10 files.name=random + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/random + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: f8a4019adc566855c2a704a0311ff7c4 + size: 489 + - path: params.yaml + hash: md5 + md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 + size: 1467 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/method/random + hash: md5 + md5: 723e8c93428a09edb21943a20fca5c3c.dir + size: 11639 + nfiles: 4 + - path: kdd_nsl/reports/train/random/score_dict.json + hash: md5 + md5: ed402e68904e8888b8ba6b0bebf6fa05 + size: 488 + test_each_method@random-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=random model.init.m=10 files.name=random + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn + hydra.run.dir=truthseeker/logs/method/random ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: f8a4019adc566855c2a704a0311ff7c4 + size: 489 + - path: params.yaml + hash: md5 + md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 + size: 1467 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/method/random + hash: md5 + md5: f785fe50b4007a169c37e6e9cb856268.dir + size: 11670 + nfiles: 4 + - path: truthseeker/reports/train/random/score_dict.json + hash: md5 + md5: 8bfb4b2efa55e9944cec7331401762f9 + size: 485 + prepare_distance_matrices@0-10-kdd_nsl: + cmd: python -m deckard.layers.optimise files.name=0-10 stage=train data=kdd_nsl + dataset=kdd_nsl data.sample.random_state=0 data.sample.train_size=10 dataset=kdd_nsl + files.directory=kdd_nsl model_name=gzip_classifier model=gzip_classifier model.init.distance_matrix=kdd_nsl/model/gzip_classifier/gzip/0-10.npz + model.init.method=random model.init.m=100 ++raise_exception=True + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 3332d80113acf55f8e69e46aea82a1cc + size: 412 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: + https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/d6a22cdb42a1db624c89f0298cb4f654d3812703/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: + https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/d6a22cdb42a1db624c89f0298cb4f654d3812703/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: kdd_nsl/model/gzip_classifier/gzip/0-100.npz + k: 1 + m: -1 + method: + name: gzip_classifier.GzipClassifier + library: sklearn + model_name: gzip_classifier + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/model/gzip_classifier/gzip/0-10.npz + hash: md5 + md5: 1b745ff8dbc88f247f3245d9efd6de7e + size: 208 + - path: kdd_nsl/reports/train/0-10/score_dict.json + hash: md5 + md5: cae521db2dcda14d0d3ed880c26adf62 + size: 233 + prepare_distance_matrices@0-100-kdd_nsl: + cmd: python -m deckard.layers.optimise files.name=0-100 stage=train data=kdd_nsl + dataset=kdd_nsl data.sample.random_state=0 data.sample.train_size=100 dataset=kdd_nsl + files.directory=kdd_nsl model_name=gzip_classifier model=gzip_classifier model.init.distance_matrix=kdd_nsl/model/gzip_classifier/gzip/0-100.npz + model.init.method=random model.init.m=100 ++raise_exception=True + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 222b4b55b1b16639ce30218bf60c1f32 + size: 412 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: + https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/d6a22cdb42a1db624c89f0298cb4f654d3812703/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + data: + cmd: python data_prep.py + deps: + - path: data_prep.py + hash: md5 + md5: 18244c921ed2d7cbf25b8362b3ca33aa + size: 5146 + outs: + - path: raw_data/ + hash: md5 + md5: 33d46673e0631bef98be9e8991ed1ed1.dir + size: 50328647 + nfiles: 8 + test_symmetric_methods@true-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random + model.init.m=10 files.name=symmetric_true files.directory=kdd_nsl data=kdd_nsl + dataset=kdd_nsl model_name=gzip_knn model.init.symmetric=true hydra.run.dir=kdd_nsl/logs/symmetric/true + model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/symmetric_true.npz ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + - path: raw_data/ + hash: md5 + md5: d897229dd67895957a0a4330ce95b09a.dir + size: 42279674 + nfiles: 4 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/model/gzip_knn/None/symmetric_true.npz + hash: md5 + md5: 1b745ff8dbc88f247f3245d9efd6de7e + size: 208 + - path: kdd_nsl/reports/train/symmetric_true/score_dict.json + hash: md5 + md5: bb10a010ac3f8790cdbe4310288efc63 + size: 432 + test_symmetric_methods@true-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random + model.init.m=10 files.name=symmetric_true files.directory=truthseeker data=truthseeker + dataset=truthseeker model_name=gzip_knn model.init.symmetric=true hydra.run.dir=truthseeker/logs/symmetric/true + model.init.distance_matrix=truthseeker/model/gzip_knn/None/symmetric_true.npz + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + - path: raw_data/ + hash: md5 + md5: d897229dd67895957a0a4330ce95b09a.dir + size: 42279674 + nfiles: 4 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/model/gzip_knn/None/symmetric_true.npz + hash: md5 + md5: f71a2727e708fdfb7867a6983f3aa8cf + size: 223 + - path: truthseeker/reports/train/symmetric_true/score_dict.json + hash: md5 + md5: 6d7a4eb01733e4e2fda1c40b5562646c + size: 434 + test_symmetric_methods@true-sms_spam: + cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random + model.init.m=10 files.name=symmetric_true files.directory=sms_spam data=sms_spam + dataset=sms_spam model_name=gzip_knn model.init.symmetric=true hydra.run.dir=sms_spam/logs/symmetric/true + model.init.distance_matrix=sms_spam/model/gzip_knn/None/symmetric_true.npz ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + - path: raw_data/ + hash: md5 + md5: d897229dd67895957a0a4330ce95b09a.dir + size: 42279674 + nfiles: 4 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/model/gzip_knn/None/symmetric_true.npz + hash: md5 + md5: 1b745ff8dbc88f247f3245d9efd6de7e + size: 208 + - path: sms_spam/reports/train/symmetric_true/score_dict.json + hash: md5 + md5: 0b8d690ffca7173942d490a2f0cbeec4 + size: 432 + test_symmetric_methods@true-ddos: + cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random + model.init.m=10 files.name=symmetric_true files.directory=ddos data=ddos dataset=ddos + model_name=gzip_knn model.init.symmetric=true hydra.run.dir=ddos/logs/symmetric/true + model.init.distance_matrix=ddos/model/gzip_knn/None/symmetric_true.npz ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + - path: raw_data/ + hash: md5 + md5: d897229dd67895957a0a4330ce95b09a.dir + size: 42279674 + nfiles: 4 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/model/gzip_knn/None/symmetric_true.npz + hash: md5 + md5: 1b745ff8dbc88f247f3245d9efd6de7e + size: 208 + - path: ddos/reports/train/symmetric_true/score_dict.json + hash: md5 + md5: 2c12176f8bf7355f284e059b2527cf44 + size: 418 + test_symmetric_methods@false-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random + model.init.m=10 files.name=symmetric_false files.directory=kdd_nsl data=kdd_nsl + dataset=kdd_nsl model_name=gzip_knn model.init.symmetric=false hydra.run.dir=kdd_nsl/logs/symmetric/false + model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/symmetric_false.npz ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + - path: raw_data/ + hash: md5 + md5: d897229dd67895957a0a4330ce95b09a.dir + size: 42279674 + nfiles: 4 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/model/gzip_knn/None/symmetric_false.npz + hash: md5 + md5: 9a9fcf9ba5dbc34eb2ca1f203088fc47 + size: 740 + - path: kdd_nsl/reports/train/symmetric_false/score_dict.json + hash: md5 + md5: 8ae56e642565330a37e731472a6c2d76 + size: 429 + test_symmetric_methods@false-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random + model.init.m=10 files.name=symmetric_false files.directory=truthseeker data=truthseeker + dataset=truthseeker model_name=gzip_knn model.init.symmetric=false hydra.run.dir=truthseeker/logs/symmetric/false + model.init.distance_matrix=truthseeker/model/gzip_knn/None/symmetric_false.npz + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + - path: raw_data/ + hash: md5 + md5: d897229dd67895957a0a4330ce95b09a.dir + size: 42279674 + nfiles: 4 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/model/gzip_knn/None/symmetric_false.npz + hash: md5 + md5: b02cc76ddfb10d1e0e63e0f6e05cdaae + size: 1791 + - path: truthseeker/reports/train/symmetric_false/score_dict.json + hash: md5 + md5: 4ef36cb0b198d778dc8e0e6ff282d778 + size: 433 + test_symmetric_methods@false-sms_spam: + cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random + model.init.m=10 files.name=symmetric_false files.directory=sms_spam data=sms_spam + dataset=sms_spam model_name=gzip_knn model.init.symmetric=false hydra.run.dir=sms_spam/logs/symmetric/false + model.init.distance_matrix=sms_spam/model/gzip_knn/None/symmetric_false.npz + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + - path: raw_data/ + hash: md5 + md5: d897229dd67895957a0a4330ce95b09a.dir + size: 42279674 + nfiles: 4 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/model/gzip_knn/None/symmetric_false.npz + hash: md5 + md5: ac71e5af3607731b783a490caf81c37f + size: 694 + - path: sms_spam/reports/train/symmetric_false/score_dict.json + hash: md5 + md5: 66d92f0ed630b08fbddb1a9c07f13981 + size: 432 + test_symmetric_methods@false-ddos: + cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random + model.init.m=10 files.name=symmetric_false files.directory=ddos data=ddos dataset=ddos + model_name=gzip_knn model.init.symmetric=false hydra.run.dir=ddos/logs/symmetric/false + model.init.distance_matrix=ddos/model/gzip_knn/None/symmetric_false.npz ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + - path: raw_data/ + hash: md5 + md5: d897229dd67895957a0a4330ce95b09a.dir + size: 42279674 + nfiles: 4 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/model/gzip_knn/None/symmetric_false.npz + hash: md5 + md5: 0d3f08d9c6cb8ddc6d3e68f8208c9bc5 + size: 821 + - path: ddos/reports/train/symmetric_false/score_dict.json + hash: md5 + md5: ba81be29d56943d6d573597c93ba8081 + size: 412 + test_each_compressor@gzip-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip files.directory=kdd_nsl + data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/gzip.npz + model.init.compressor=gzip model.init.m=10 hydra.run.dir=kdd_nsl/logs/compressor/gzip + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/reports/train/gzip/score_dict.json + hash: md5 + md5: b3f76b5e7fe68821d9336c4968888b08 + size: 431 + test_each_compressor@gzip-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip files.directory=truthseeker + data=truthseeker dataset=truthseeker model_name=gzip_knn model.init.method=random + model.init.distance_matrix=truthseeker/model/gzip_knn/None/gzip.npz model.init.compressor=gzip model.init.m=10 + hydra.run.dir=truthseeker/logs/compressor/gzip ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/reports/train/gzip/score_dict.json + hash: md5 + md5: df9b8a302dfb3b85b5c3c7623d86383e + size: 434 + test_each_compressor@gzip-sms_spam: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip files.directory=sms_spam + data=sms_spam dataset=sms_spam model_name=gzip_knn model.init.method=random + model.init.distance_matrix=sms_spam/model/gzip_knn/None/gzip.npz model.init.compressor=gzip model.init.m=10 + hydra.run.dir=sms_spam/logs/compressor/gzip ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/reports/train/gzip/score_dict.json + hash: md5 + md5: 39a6710366ed557259ef981fc0b45a6a + size: 432 + test_each_compressor@gzip-ddos: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip files.directory=ddos + data=ddos dataset=ddos model_name=gzip_knn model.init.method=random model.init.distance_matrix=ddos/model/gzip_knn/None/gzip.npz + model.init.compressor=gzip model.init.m=10 hydra.run.dir=ddos/logs/compressor/gzip + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/reports/train/gzip/score_dict.json + hash: md5 + md5: 1919cb29d6196b8dd14c01458e341a6b + size: 414 + test_each_compressor@zstd-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train files.name=zstd files.directory=kdd_nsl + data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/zstd.npz + model.init.compressor=zstd model.init.m=10 hydra.run.dir=kdd_nsl/logs/compressor/zstd + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/reports/train/zstd/score_dict.json + hash: md5 + md5: 868509c201cbb0093818357427896da7 + size: 416 + test_each_compressor@zstd-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train files.name=zstd files.directory=truthseeker + data=truthseeker dataset=truthseeker model_name=gzip_knn model.init.method=random + model.init.distance_matrix=truthseeker/model/gzip_knn/None/zstd.npz model.init.compressor=zstd model.init.m=10 + hydra.run.dir=truthseeker/logs/compressor/zstd ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/reports/train/zstd/score_dict.json + hash: md5 + md5: 89546ca3a3510fd73671341863c69cb9 + size: 434 + test_each_compressor@zstd-sms_spam: + cmd: 'python -m deckard.layers.optimise stage=train files.name=zstd files.directory=sms_spam + data=sms_spam dataset=sms_spam model_name=gzip_knn model.init.method=random + model.init.distance_matrix=sms_spam/model/gzip_knn/None/zstd.npz model.init.compressor=zstd model.init.m=10 + hydra.run.dir=sms_spam/logs/compressor/zstd ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/reports/train/zstd/score_dict.json + hash: md5 + md5: e5a10b0013b032b22dd6cc596a7810bb + size: 429 + test_each_compressor@zstd-ddos: + cmd: 'python -m deckard.layers.optimise stage=train files.name=zstd files.directory=ddos + data=ddos dataset=ddos model_name=gzip_knn model.init.method=random model.init.distance_matrix=ddos/model/gzip_knn/None/zstd.npz + model.init.compressor=zstd model.init.m=10 hydra.run.dir=ddos/logs/compressor/zstd + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/reports/train/zstd/score_dict.json + hash: md5 + md5: 898feb287504053c9de9c1a809733c4b + size: 432 + test_each_compressor@pkl-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train files.name=pkl files.directory=kdd_nsl + data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/pkl.npz + model.init.compressor=pkl model.init.m=10 hydra.run.dir=kdd_nsl/logs/compressor/pkl + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/reports/train/pkl/score_dict.json + hash: md5 + md5: 3e01c227095014ab9f4665ea98e7f3b5 + size: 430 + test_each_compressor@pkl-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train files.name=pkl files.directory=truthseeker + data=truthseeker dataset=truthseeker model_name=gzip_knn model.init.method=random + model.init.distance_matrix=truthseeker/model/gzip_knn/None/pkl.npz model.init.compressor=pkl model.init.m=10 + hydra.run.dir=truthseeker/logs/compressor/pkl ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/reports/train/pkl/score_dict.json + hash: md5 + md5: 85d4598fcbe6077a465a9edeadd3843a + size: 430 + test_each_compressor@pkl-sms_spam: + cmd: 'python -m deckard.layers.optimise stage=train files.name=pkl files.directory=sms_spam + data=sms_spam dataset=sms_spam model_name=gzip_knn model.init.method=random + model.init.distance_matrix=sms_spam/model/gzip_knn/None/pkl.npz model.init.compressor=pkl model.init.m=10 + hydra.run.dir=sms_spam/logs/compressor/pkl ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/reports/train/pkl/score_dict.json + hash: md5 + md5: a4667414e7721ee7ed489df1e412e0b0 + size: 431 + test_each_compressor@pkl-ddos: + cmd: 'python -m deckard.layers.optimise stage=train files.name=pkl files.directory=ddos + data=ddos dataset=ddos model_name=gzip_knn model.init.method=random model.init.distance_matrix=ddos/model/gzip_knn/None/pkl.npz + model.init.compressor=pkl model.init.m=10 hydra.run.dir=ddos/logs/compressor/pkl + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/reports/train/pkl/score_dict.json + hash: md5 + md5: 340261dd836239b846699c4c687b3042 + size: 432 + test_each_compressor@bz2-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train files.name=bz2 files.directory=kdd_nsl + data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/bz2.npz + model.init.compressor=bz2 model.init.m=10 hydra.run.dir=kdd_nsl/logs/compressor/bz2 + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/reports/train/bz2/score_dict.json + hash: md5 + md5: 05fd4b45d252c648d4afb4ba3ffc05e4 + size: 430 + test_each_compressor@bz2-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train files.name=bz2 files.directory=truthseeker + data=truthseeker dataset=truthseeker model_name=gzip_knn model.init.method=random + model.init.distance_matrix=truthseeker/model/gzip_knn/None/bz2.npz model.init.compressor=bz2 model.init.m=10 + hydra.run.dir=truthseeker/logs/compressor/bz2 ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/reports/train/bz2/score_dict.json + hash: md5 + md5: 1b3094ea4075cb1b5b8cd3f74bf0c3dc + size: 432 + test_each_compressor@bz2-sms_spam: + cmd: 'python -m deckard.layers.optimise stage=train files.name=bz2 files.directory=sms_spam + data=sms_spam dataset=sms_spam model_name=gzip_knn model.init.method=random + model.init.distance_matrix=sms_spam/model/gzip_knn/None/bz2.npz model.init.compressor=bz2 model.init.m=10 + hydra.run.dir=sms_spam/logs/compressor/bz2 ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/reports/train/bz2/score_dict.json + hash: md5 + md5: 45303b7d052fb91e65c9f3ad97999b6a + size: 431 + test_each_compressor@bz2-ddos: + cmd: 'python -m deckard.layers.optimise stage=train files.name=bz2 files.directory=ddos + data=ddos dataset=ddos model_name=gzip_knn model.init.method=random model.init.distance_matrix=ddos/model/gzip_knn/None/bz2.npz + model.init.compressor=bz2 model.init.m=10 hydra.run.dir=ddos/logs/compressor/bz2 + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/reports/train/bz2/score_dict.json + hash: md5 + md5: fdfa470b2053f561dea2e047423b54cd + size: 431 + test_each_precompute@True-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_True + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random + model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/True.npz +model.init.precompute=True model.init.m=10 hydra.run.dir=kdd_nsl/logs/precompute/True + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/reports/train/precompute_True/score_dict.json + hash: md5 + md5: f5c9a9ce41a0680f1e18874d6f21bd25 + size: 433 + test_each_precompute@True-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_True + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn + model.init.method=random model.init.distance_matrix=truthseeker/model/gzip_knn/None/True.npz + +model.init.precompute=True model.init.m=10 hydra.run.dir=truthseeker/logs/precompute/True + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/reports/train/precompute_True/score_dict.json + hash: md5 + md5: 76dcdbf7dc1fb63ce7b978c2f6bef8a2 + size: 435 + test_each_precompute@True-sms_spam: + cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_True + files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn + model.init.method=random model.init.distance_matrix=sms_spam/model/gzip_knn/None/True.npz + +model.init.precompute=True model.init.m=10 hydra.run.dir=sms_spam/logs/precompute/True + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/reports/train/precompute_True/score_dict.json + hash: md5 + md5: fe9a23520513840fe4a90fb8413e62da + size: 432 + test_each_precompute@True-ddos: + cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_True + files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.method=random + model.init.distance_matrix=ddos/model/gzip_knn/None/True.npz +model.init.precompute=True model.init.m=10 hydra.run.dir=ddos/logs/precompute/True + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/reports/train/precompute_True/score_dict.json + hash: md5 + md5: 0d72c99dc99df13629a383ca9745712e + size: 429 + test_each_precompute@False-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_False + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random + model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/False.npz +model.init.precompute=False model.init.m=10 hydra.run.dir=kdd_nsl/logs/precompute/False + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/reports/train/precompute_False/score_dict.json + hash: md5 + md5: d225ea006c02f56f552431e223ef6576 + size: 429 + test_each_precompute@False-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_False + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn + model.init.method=random model.init.distance_matrix=truthseeker/model/gzip_knn/None/False.npz + +model.init.precompute=False model.init.m=10 hydra.run.dir=truthseeker/logs/precompute/False + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/reports/train/precompute_False/score_dict.json + hash: md5 + md5: e8094fb43b55432d298346a0a291ac71 + size: 431 + test_each_precompute@False-sms_spam: + cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_False + files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn + model.init.method=random model.init.distance_matrix=sms_spam/model/gzip_knn/None/False.npz + +model.init.precompute=False model.init.m=10 hydra.run.dir=sms_spam/logs/precompute/False + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/reports/train/precompute_False/score_dict.json + hash: md5 + md5: 0f3b13aba3cc817f2327769f36b54939 + size: 432 + test_each_precompute@False-ddos: + cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_False + files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.method=random + model.init.distance_matrix=ddos/model/gzip_knn/None/False.npz +model.init.precompute=False model.init.m=10 hydra.run.dir=ddos/logs/precompute/False + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/reports/train/precompute_False/score_dict.json + hash: md5 + md5: 9cc47f921a908ad81e486980d134f453 + size: 418 + test_each_metric@levenshtein-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=levenshtein files.name=levenshtein + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/levenshtein.npz + hydra.sweeper.n_jobs=1 hydra.run.dir=kdd_nsl/logs/metric/levenshtein ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/reports/train/levenshtein/score_dict.json + hash: md5 + md5: 4f517489b794c13bbbbb477bd7b14ea8 + size: 248 + test_each_metric@levenshtein-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=levenshtein files.name=levenshtein + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn + model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/levenshtein.npz hydra.sweeper.n_jobs=1 + hydra.run.dir=truthseeker/logs/metric/levenshtein ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/reports/train/levenshtein/score_dict.json + hash: md5 + md5: 2f0fa43167cde43c2d8c901ee6bc360d + size: 250 + test_each_metric@levenshtein-sms_spam: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=levenshtein files.name=levenshtein + files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn + model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/levenshtein.npz hydra.sweeper.n_jobs=1 + hydra.run.dir=sms_spam/logs/metric/levenshtein ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/reports/train/levenshtein/score_dict.json + hash: md5 + md5: bb8456e5a2457e841619d5750922bd0c + size: 246 + test_each_metric@levenshtein-ddos: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=levenshtein files.name=levenshtein + files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/levenshtein.npz + hydra.sweeper.n_jobs=1 hydra.run.dir=ddos/logs/metric/levenshtein ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/reports/train/levenshtein/score_dict.json + hash: md5 + md5: 1956a0651292bf6919a103e46c0c5906 + size: 248 + test_each_metric@ratio-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=ratio files.name=ratio + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/ratio.npz + hydra.sweeper.n_jobs=1 hydra.run.dir=kdd_nsl/logs/metric/ratio ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/reports/train/ratio/score_dict.json + hash: md5 + md5: 841058c500666af10a3a84fd7769e53d + size: 244 + test_each_metric@ratio-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=ratio files.name=ratio + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn + model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/ratio.npz hydra.sweeper.n_jobs=8 + hydra.run.dir=truthseeker/logs/metric/ratio ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/reports/train/ratio/score_dict.json + hash: md5 + md5: 5cbc24c928a073a9459428d4e1984ba1 + size: 426 + test_each_metric@ratio-sms_spam: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=ratio files.name=ratio + files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn + model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/ratio.npz hydra.sweeper.n_jobs=8 + hydra.run.dir=sms_spam/logs/metric/ratio ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/reports/train/ratio/score_dict.json + hash: md5 + md5: b8ea7bf8de9af2250f1a2c84695be1f9 + size: 425 + test_each_metric@ratio-ddos: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=ratio files.name=ratio + files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/ratio.npz + hydra.sweeper.n_jobs=8 hydra.run.dir=ddos/logs/metric/ratio ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/reports/train/ratio/score_dict.json + hash: md5 + md5: 5f9750a5729db8f4912f50a8610fc48c + size: 429 + test_each_metric@hamming-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=hamming files.name=hamming + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/hamming.npz + hydra.sweeper.n_jobs=8 hydra.run.dir=kdd_nsl/logs/metric/hamming ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/reports/train/hamming/score_dict.json + hash: md5 + md5: ed699605a76c4116a461994f139da237 + size: 429 + test_each_metric@hamming-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=hamming files.name=hamming + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn + model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/hamming.npz hydra.sweeper.n_jobs=8 + hydra.run.dir=truthseeker/logs/metric/hamming ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/reports/train/hamming/score_dict.json + hash: md5 + md5: 8a3f87734f208a61bc27114729fd4fd6 + size: 432 + test_each_metric@hamming-sms_spam: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=hamming files.name=hamming + files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn + model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/hamming.npz hydra.sweeper.n_jobs=8 + hydra.run.dir=sms_spam/logs/metric/hamming ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/reports/train/hamming/score_dict.json + hash: md5 + md5: 0c0988090568dc526d0137ff7e38ca6a + size: 428 + test_each_metric@hamming-ddos: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=hamming files.name=hamming + files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/hamming.npz + hydra.sweeper.n_jobs=8 hydra.run.dir=ddos/logs/metric/hamming ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/reports/train/hamming/score_dict.json + hash: md5 + md5: 949f7ea27f2521fbbb2b05ec3a111346 + size: 428 + test_each_metric@jaro-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro files.name=jaro + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/jaro.npz + hydra.sweeper.n_jobs=8 hydra.run.dir=kdd_nsl/logs/metric/jaro ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/reports/train/jaro/score_dict.json + hash: md5 + md5: 3bd4e5c89097070d439c3f13359ff369 + size: 428 + test_each_metric@jaro-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro files.name=jaro + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn + model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/jaro.npz hydra.sweeper.n_jobs=8 + hydra.run.dir=truthseeker/logs/metric/jaro ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/reports/train/jaro/score_dict.json + hash: md5 + md5: b86d70f18ea7ee85132f4d8407058d60 + size: 429 + test_each_metric@jaro-sms_spam: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro files.name=jaro + files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn + model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/jaro.npz hydra.sweeper.n_jobs=8 + hydra.run.dir=sms_spam/logs/metric/jaro ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/reports/train/jaro/score_dict.json + hash: md5 + md5: b7550248d10852d10a16610f707ea50f + size: 429 + test_each_metric@jaro-ddos: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro files.name=jaro + files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/jaro.npz + hydra.sweeper.n_jobs=8 hydra.run.dir=ddos/logs/metric/jaro ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/reports/train/jaro/score_dict.json + hash: md5 + md5: e7987cb2d248f7eaa20a842bbcacc442 + size: 430 + test_each_metric@jaro_winkler-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro_winkler files.name=jaro_winkler + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/jaro_winkler.npz + hydra.sweeper.n_jobs=8 hydra.run.dir=kdd_nsl/logs/metric/jaro_winkler ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/reports/train/jaro_winkler/score_dict.json + hash: md5 + md5: a44e09663d05f8330352712ccfd72f17 + size: 428 + test_each_metric@jaro_winkler-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro_winkler files.name=jaro_winkler + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn + model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/jaro_winkler.npz hydra.sweeper.n_jobs=8 + hydra.run.dir=truthseeker/logs/metric/jaro_winkler ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/reports/train/jaro_winkler/score_dict.json + hash: md5 + md5: 2a80298804f36bc7af477e11ff9f6679 + size: 428 + test_each_metric@jaro_winkler-sms_spam: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro_winkler files.name=jaro_winkler + files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn + model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/jaro_winkler.npz hydra.sweeper.n_jobs=8 + hydra.run.dir=sms_spam/logs/metric/jaro_winkler ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/reports/train/jaro_winkler/score_dict.json + hash: md5 + md5: 8b7d0f92e14d74042fb8cd907e3a8274 + size: 430 + test_each_metric@jaro_winkler-ddos: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro_winkler files.name=jaro_winkler + files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/jaro_winkler.npz + hydra.sweeper.n_jobs=8 hydra.run.dir=ddos/logs/metric/jaro_winkler ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/reports/train/jaro_winkler/score_dict.json + hash: md5 + md5: aa4130c79130ddbaaebaa35a1cae7d91 + size: 426 + test_each_metric@seqratio-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=seqratio files.name=seqratio + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/seqratio.npz + hydra.sweeper.n_jobs=8 hydra.run.dir=kdd_nsl/logs/metric/seqratio ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/reports/train/seqratio/score_dict.json + hash: md5 + md5: 9075115a02136aaa59bd87074589ce42 + size: 430 + test_each_metric@seqratio-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=seqratio files.name=seqratio + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn + model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/seqratio.npz hydra.sweeper.n_jobs=8 + hydra.run.dir=truthseeker/logs/metric/seqratio ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/reports/train/seqratio/score_dict.json + hash: md5 + md5: ac2bdff9261ce4c9e511294dd69b19f8 + size: 434 + test_each_metric@seqratio-sms_spam: + cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=seqratio files.name=seqratio + files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn + model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/seqratio.npz hydra.sweeper.n_jobs=8 + hydra.run.dir=sms_spam/logs/metric/seqratio ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 14173762472fe294a1d3228b4ee83d4b + size: 431 + - path: params.yaml + hash: md5 + md5: 4999b48c21cb63a45801003d03576594 + size: 2082 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + art: + _target_: deckard.base.model.art_pipeline.ArtPipeline + initialize: + nb_classes: 2 + library: sklearn + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 10 + train_size: 10 + sklearn_pipeline: + encoder: + handle_unknown: use_encoded_value + name: sklearn.preprocessing.OrdinalEncoder + unknown_value: -1 + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: true + with_std: true + target: label + init: + _target_: deckard.base.model.ModelInitializer + compressor: gzip + distance_matrix: + k: 1 + m: -1 + method: + metric: ncd + test_each_method@ddos-random: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=random model.init.m=3 + data.sample.train_size=100 files.name=random files.directory=ddos data=ddos + dataset=ddos model_name=random hydra.run.dir=ddos/logs/method/random ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/logs/method/random + hash: md5 + md5: 3bfcc27fd44bf9333be7081f3fceb94c.dir + size: 8340 + nfiles: 4 + - path: ddos/reports/train/random/score_dict.json + hash: md5 + md5: 218449c8e2b7425707008d01e751eee4 + size: 281 + test_each_method@ddos-medoid: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=medoid model.init.m=3 + data.sample.train_size=100 files.name=medoid files.directory=ddos data=ddos + dataset=ddos model_name=medoid hydra.run.dir=ddos/logs/method/medoid ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/logs/method/medoid + hash: md5 + md5: cab03f71d3883157c103a207662f0f01.dir + size: 8377 + nfiles: 4 + - path: ddos/reports/train/medoid/score_dict.json + hash: md5 + md5: eb281dc186936044bcf39edf3b5c2a97 + size: 283 + test_each_method@ddos-sum: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=sum model.init.m=3 + data.sample.train_size=100 files.name=sum files.directory=ddos data=ddos dataset=ddos + model_name=sum hydra.run.dir=ddos/logs/method/sum ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/logs/method/sum + hash: md5 + md5: 1acd35c26f1f01c1d97695be4df4be9f.dir + size: 8320 + nfiles: 4 + - path: ddos/reports/train/sum/score_dict.json + hash: md5 + md5: d8ee90602dcf3e5e3d1541fd051d8c25 + size: 283 + test_each_method@ddos-svc: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=svc model.init.m=3 + data.sample.train_size=100 files.name=svc files.directory=ddos data=ddos dataset=ddos + model_name=svc hydra.run.dir=ddos/logs/method/svc ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/logs/method/svc + hash: md5 + md5: ff1e2d4db8fbd074fae27c28e6d7efab.dir + size: 8317 + nfiles: 4 + - path: ddos/reports/train/svc/score_dict.json + hash: md5 + md5: 02086eaaafb2de9549a587e0cac8d44f + size: 280 + test_each_method@ddos-condensed: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=condensed model.init.m=1 + files.name=condensed files.directory=ddos data=ddos dataset=ddos model_name=condensed + hydra.run.dir=ddos/logs/method/condensed ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 064e5bb42979e36c917c538b2a7bc0cc + size: 489 + - path: params.yaml + hash: md5 + md5: 8e937140db56a135e97c05461c573520 + size: 1345 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/logs/method/condensed + hash: md5 + md5: 5dfc9ebfe1c6f3e496814c86a05a5329.dir + size: 10117 + nfiles: 4 + - path: ddos/reports/train/condensed/score_dict.json + hash: md5 + md5: 56bcddf54558d9cdd1a7587878aceffa + size: 284 + test_each_method@ddos-hardness: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=hardness model.init.m=3 + data.sample.train_size=100 files.name=hardness files.directory=ddos data=ddos + dataset=ddos model_name=hardness hydra.run.dir=ddos/logs/method/hardness ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/logs/method/hardness + hash: md5 + md5: 92679e897538c5e98e89f11ca456f483.dir + size: 8413 + nfiles: 4 + - path: ddos/reports/train/hardness/score_dict.json + hash: md5 + md5: 24a77200255cec8b4ec9f1877188fdda + size: 281 + test_each_method@ddos-nearmiss: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=nearmiss model.init.m=3 + data.sample.train_size=100 files.name=nearmiss files.directory=ddos data=ddos + dataset=ddos model_name=nearmiss hydra.run.dir=ddos/logs/method/nearmiss ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/logs/method/nearmiss + hash: md5 + md5: 84fc6455a5c576fa04c36919c33ae8fd.dir + size: 8416 + nfiles: 4 + - path: ddos/reports/train/nearmiss/score_dict.json + hash: md5 + md5: b4602181657a738a97631883018e221a + size: 284 + test_each_method@truthseeker-svc: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=svc model.init.m=3 + data.sample.train_size=100 files.name=svc files.directory=truthseeker data=truthseeker + dataset=truthseeker model_name=svc hydra.run.dir=truthseeker/logs/method/svc + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 064e5bb42979e36c917c538b2a7bc0cc + size: 489 + - path: params.yaml + hash: md5 + md5: 8e937140db56a135e97c05461c573520 + size: 1345 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/method/svc + hash: md5 + md5: 7f9ad95f5b5a7d8ea8a41d09560bca7e.dir + size: 10252 + nfiles: 4 + - path: truthseeker/reports/train/svc/score_dict.json + hash: md5 + md5: dca27d752d8d9db2b52a61d9e0d9bebf + size: 283 + test_each_method@truthseeker-medoid: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=medoid model.init.m=3 + data.sample.train_size=100 files.name=medoid files.directory=truthseeker data=truthseeker + dataset=truthseeker model_name=medoid hydra.run.dir=truthseeker/logs/method/medoid + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 064e5bb42979e36c917c538b2a7bc0cc + size: 489 + - path: params.yaml + hash: md5 + md5: 8e937140db56a135e97c05461c573520 + size: 1345 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/method/medoid + hash: md5 + md5: 57b1e2e154ae8653331898992d0d7f7c.dir + size: 10316 + nfiles: 4 + - path: truthseeker/reports/train/medoid/score_dict.json + hash: md5 + md5: a728020aeb632257e52cc9b13337870e + size: 284 + test_each_method@truthseeker-sum: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=sum model.init.m=3 + data.sample.train_size=100 files.name=sum files.directory=truthseeker data=truthseeker + dataset=truthseeker model_name=sum hydra.run.dir=truthseeker/logs/method/sum + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 064e5bb42979e36c917c538b2a7bc0cc + size: 489 + - path: params.yaml + hash: md5 + md5: 8e937140db56a135e97c05461c573520 + size: 1345 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/method/sum + hash: md5 + md5: b8934f0660e9e6043e5a7117d2e3d462.dir + size: 10252 + nfiles: 4 + - path: truthseeker/reports/train/sum/score_dict.json + hash: md5 + md5: 0a4117f35aab6ec4b41ac526f8715aa2 + size: 283 + test_each_method@truthseeker-random: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=random model.init.m=3 + data.sample.train_size=100 files.name=random files.directory=truthseeker data=truthseeker + dataset=truthseeker model_name=random hydra.run.dir=truthseeker/logs/method/random + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 064e5bb42979e36c917c538b2a7bc0cc + size: 489 + - path: params.yaml + hash: md5 + md5: 8e937140db56a135e97c05461c573520 + size: 1345 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/method/random + hash: md5 + md5: a77f4e67f85e529063b18617cda5525a.dir + size: 10289 + nfiles: 4 + - path: truthseeker/reports/train/random/score_dict.json + hash: md5 + md5: 08f3cc499d61caaa4ab912af1a2ff558 + size: 283 + test_each_method@truthseeker-nearmiss: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=nearmiss model.init.m=3 + data.sample.train_size=100 files.name=nearmiss files.directory=truthseeker data=truthseeker + dataset=truthseeker model_name=nearmiss hydra.run.dir=truthseeker/logs/method/nearmiss + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 064e5bb42979e36c917c538b2a7bc0cc + size: 489 + - path: params.yaml + hash: md5 + md5: 8e937140db56a135e97c05461c573520 + size: 1345 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/method/nearmiss + hash: md5 + md5: 6ea3f0a574d7abd052e3ee5466356e13.dir + size: 10359 + nfiles: 4 + - path: truthseeker/reports/train/nearmiss/score_dict.json + hash: md5 + md5: f03918d65cac7f21e210a14be8ee1373 + size: 285 + test_each_method@truthseeker-hardness: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=hardness model.init.m=3 + data.sample.train_size=100 files.name=hardness files.directory=truthseeker data=truthseeker + dataset=truthseeker model_name=hardness hydra.run.dir=truthseeker/logs/method/hardness + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 064e5bb42979e36c917c538b2a7bc0cc + size: 489 + - path: params.yaml + hash: md5 + md5: 8e937140db56a135e97c05461c573520 + size: 1345 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/method/hardness + hash: md5 + md5: c5ea09925ae34a0fee42f1ec06d88090.dir + size: 10355 + nfiles: 4 + - path: truthseeker/reports/train/hardness/score_dict.json + hash: md5 + md5: 87bdbb0cafd4462b87035af79efc81c5 + size: 281 + test_each_method@truthseeker-knn: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=knn model.init.m=3 + data.sample.train_size=100 files.name=knn files.directory=truthseeker data=truthseeker + dataset=truthseeker model_name=knn hydra.run.dir=truthseeker/logs/method/knn + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 064e5bb42979e36c917c538b2a7bc0cc + size: 489 + - path: params.yaml + hash: md5 + md5: 8e937140db56a135e97c05461c573520 + size: 1345 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/method/knn + hash: md5 + md5: 5c5fe8f17151816b01d863f51db3d01a.dir + size: 10254 + nfiles: 4 + - path: truthseeker/reports/train/knn/score_dict.json + hash: md5 + md5: 4157a5deabda43d207a543b9f038b5af + size: 285 + test_each_method@ddos-knn: + cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=knn model.init.m=3 + data.sample.train_size=100 files.name=knn files.directory=ddos data=ddos dataset=ddos + model_name=knn hydra.run.dir=ddos/logs/method/knn ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/logs/method/knn + hash: md5 + md5: 8d73125fea91a47efc49ba2b4a68e1fe.dir + size: 8319 + nfiles: 4 + - path: ddos/reports/train/knn/score_dict.json + hash: md5 + md5: fb77e1c8e53bac0e077d2140f1abc6d6 + size: 282 + condense@sms_spam-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.test_size=100 model_name=gzip_logistic model=gzip_logistic hydra.sweeper.study_name=condense_gzip_logistic_sms_spam + hydra.sweeper.n_trials=1 hydra.sweeper.n_jobs=32 hydra.sweep.dir=sms_spam/logs/condense/gzip_logistic/ + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/study.csv + ++data.sample.train_size='int(interval(30, 1000))' ++data.sample.random_state='int(interval(10000, + 20000))' ++data.sample.stratify=True model.init.m='tag(log, interval(.1, 1))' + +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn files.directory=sms_spam + files.reports=reports/condense/gzip_logistic/ hydra.launcher.n_jobs=32 --config-name + gzip_logistic --multirun + deps: + - path: conf/model/best_gzip_logistic_sms_spam.yaml + hash: md5 + md5: 026fca7fe5d7bb75c4a3ae245f86a2c2 + size: 332 + - path: sms_spam/logs/method/ + hash: md5 + md5: e8e327bbd5859a6c1c362fd482435727.dir + size: 69377 + nfiles: 24 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: ??? + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 2 + direction: ${direction} + params: + ++data.sample.train_size: int(interval(20, 1000)) + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.1, 1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: sms_spam/logs/condense/gzip_logistic/ + hash: md5 + md5: 9496098bd1497b6c46124e40e665ee74.dir + size: 14280 + nfiles: 5 + - path: sms_spam/reports/condense/gzip_logistic/ + hash: md5 + md5: c7e2a43c1dc170c3d593825f57ad0e9b.dir + size: 2707 + nfiles: 3 + condense@truthseeker-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.test_size=100 model_name=gzip_svc model=gzip_svc hydra.sweeper.study_name=condense_gzip_svc_truthseeker + hydra.sweeper.n_trials=1 hydra.sweeper.n_jobs=32 hydra.sweep.dir=truthseeker/logs/condense/gzip_svc/ + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/study.csv ++data.sample.train_size='int(interval(30, + 1000))' ++data.sample.random_state='int(interval(10000, 20000))' ++data.sample.stratify=True + model.init.m='tag(log, interval(.1, 1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn + files.directory=truthseeker files.reports=reports/condense/gzip_svc/ hydra.launcher.n_jobs=32 + --config-name gzip_svc --multirun + deps: + - path: conf/model/best_gzip_svc_truthseeker.yaml + hash: md5 + md5: 97d9d5857744b1cc077513ac5a659f62 + size: 302 + - path: truthseeker/logs/method/ + hash: md5 + md5: 6f6693db2bb9520dc7956f0d0c003e23.dir + size: 116543 + nfiles: 44 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: ??? + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 2 + direction: ${direction} + params: + ++data.sample.train_size: int(interval(20, 1000)) + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.1, 1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: truthseeker/logs/condense/gzip_svc/ + hash: md5 + md5: bd7cbae34fd6feecf60a49cb537b0f80.dir + size: 13751 + nfiles: 5 + - path: truthseeker/reports/condense/gzip_svc/ + hash: md5 + md5: a24584cdc3464b86b6ff88b90dc62e5e.dir + size: 2701 + nfiles: 3 + condense@sms_spam-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.test_size=100 model_name=gzip_svc model=best_gzip_svc_sms_spam hydra.sweeper.study_name=condense_gzip_svc_sms_spam + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/condense/gzip_svc/ + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/study.csv model.init.m='tag(log, + interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn + files.directory=sms_spam files.reports=reports/condense/gzip_svc/ hydra.launcher.n_jobs=16 + --config-name condense --multirun + deps: + - path: conf/model/best_gzip_svc_sms_spam.yaml + hash: md5 + md5: 771cd8e3b1368f0fbb30e518002db80f + size: 317 + - path: sms_spam/logs/method/ + hash: md5 + md5: e8e327bbd5859a6c1c362fd482435727.dir + size: 69377 + nfiles: 24 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: ??? + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 2 + direction: ${direction} + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: sms_spam/logs/condense/gzip_svc/ + hash: md5 + md5: c6ef4ecf2bec03894b2f2018cffc0888.dir + size: 1597147 + nfiles: 513 + - path: sms_spam/reports/condense/gzip_svc/ + hash: md5 + md5: aff4ca5c41e7043fe0d36b4a669ad6a7.dir + size: 344414 + nfiles: 381 + condense@ddos-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.test_size=100 + model_name=gzip_svc model=best_gzip_svc_ddos hydra.sweeper.study_name=condense_gzip_svc_ddos + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/gzip_svc/ + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/study.csv model.init.m='tag(log, + interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn + files.directory=ddos files.reports=reports/condense/gzip_svc/ hydra.launcher.n_jobs=16 + ++raise_exception=True --config-name condense --multirun + deps: + - path: conf/model/best_gzip_svc_ddos.yaml + hash: md5 + md5: f2ec5b2ff8103b93ca61a5b86888a3e6 + size: 305 + - path: ddos/logs/method/ + hash: md5 + md5: 7128c67930147170f54fb89880528199.dir + size: 120518 + nfiles: 48 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: ??? + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 2 + direction: ${direction} + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: ddos/logs/condense/gzip_svc/ + hash: md5 + md5: 98f11cc76f9f370871bfb325ec4186e4.dir + size: 1589126 + nfiles: 513 + - path: ddos/reports/condense/gzip_svc/ + hash: md5 + md5: 87ca8778bbdb8363a1e237019c87ebf5.dir + size: 345583 + nfiles: 384 + condense@sms_spam-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.test_size=100 model_name=gzip_knn model=best_gzip_knn_sms_spam hydra.sweeper.study_name=condense_gzip_knn_sms_spam + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/condense/gzip_knn/ + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/study.csv model.init.m='tag(log, + interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn + files.directory=sms_spam files.reports=reports/condense/gzip_knn/ hydra.launcher.n_jobs=16 + --config-name condense --multirun + deps: + - path: conf/model/best_gzip_knn_sms_spam.yaml + hash: md5 + md5: 430e2be20ddaa39808a6739627a98d77 + size: 259 + - path: sms_spam/logs/method/ + hash: md5 + md5: e8e327bbd5859a6c1c362fd482435727.dir + size: 69377 + nfiles: 24 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: ??? + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 2 + direction: ${direction} + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: sms_spam/logs/condense/gzip_knn/ + hash: md5 + md5: a45625dcc1d1cc1f1e20d19440e1cdf1.dir + size: 1559584 + nfiles: 513 + - path: sms_spam/reports/condense/gzip_knn/ + hash: md5 + md5: 0ac87faa8d16d77b4e7d5a96cfdde177.dir + size: 335094 + nfiles: 384 + compile@sms_spam-gzip_knn: + cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/gzip_knn --results_file + sms_spam/reports/gzip_knn.csv + deps: + - path: sms_spam/reports/gzip_knn/ + hash: md5 + md5: 89e3b68400367dee648064784adb9796.dir + size: 1499301 + nfiles: 1337 + outs: + - path: sms_spam/reports/gzip_knn.csv + hash: md5 + md5: ee7ee47f5ee27acca9e58b9249ecb954 + size: 695526 + compile@truthseeker-gzip_knn: + cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/gzip_knn --results_file + truthseeker/reports/gzip_knn.csv + deps: + - path: truthseeker/reports/gzip_knn/ + hash: md5 + md5: e5702237f62021b85240717035b53d81.dir + size: 1537318 + nfiles: 1325 + outs: + - path: truthseeker/reports/gzip_knn.csv + hash: md5 + md5: 183afe36078f60e3e478f3813b1b52a7 + size: 711959 + compile@kdd_nsl-gzip_knn: + cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/gzip_knn --results_file + kdd_nsl/reports/gzip_knn.csv + deps: + - path: kdd_nsl/reports/gzip_knn/ + hash: md5 + md5: 4dfe630ff7f6f036220f2b9aa5b3c6b1.dir + size: 4225577 + nfiles: 3608 + outs: + - path: kdd_nsl/reports/gzip_knn.csv + hash: md5 + md5: 17f27e4404093a5b50a74ca0af24e4db + size: 1964725 + compile@truthseeker-gzip_svc: + cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/gzip_svc --results_file + truthseeker/reports/gzip_svc.csv + deps: + - path: truthseeker/reports/gzip_svc/ + hash: md5 + md5: e6e273bb143c7a8949d5be4acca87eb9.dir + size: 1536370 + nfiles: 1725 + outs: + - path: truthseeker/reports/gzip_svc.csv + hash: md5 + md5: 746aae81f4af3c8ce4c8c7e3c3e866b1 + size: 870818 + compile@truthseeker-gzip_logistic: + cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/gzip_logistic --results_file + truthseeker/reports/gzip_logistic.csv + deps: + - path: truthseeker/reports/gzip_logistic/ + hash: md5 + md5: 5074027dccab644424973514ae7c8922.dir + size: 2225784 + nfiles: 1473 + outs: + - path: truthseeker/reports/gzip_logistic.csv + hash: md5 + md5: ed858c429ea35f3dac4eca9c52e036ce + size: 786129 + compile@ddos-gzip_logistic: + cmd: python -m deckard.layers.compile --report_folder ddos/reports/gzip_logistic --results_file + ddos/reports/gzip_logistic.csv + deps: + - path: ddos/reports/gzip_logistic/ + hash: md5 + md5: 6ce8a2aa8cc08ccde4467403dec1a124.dir + size: 6278656 + nfiles: 4845 + outs: + - path: ddos/reports/gzip_logistic.csv + hash: md5 + md5: 7ff452295887d9c84250c7375b7ea58a + size: 2606734 + compile@ddos-gzip_knn: + cmd: python -m deckard.layers.compile --report_folder ddos/reports/gzip_knn --results_file + ddos/reports/gzip_knn.csv + deps: + - path: ddos/reports/gzip_knn/ + hash: md5 + md5: ce89d46c7a34959f9d39a3d1e6ad8911.dir + size: 5724814 + nfiles: 5690 + outs: + - path: ddos/reports/gzip_knn.csv + hash: md5 + md5: fe28ae14c5cc37ee8eb5e705c3610da8 + size: 2899113 + compile@kdd_nsl-gzip_logistic: + cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/gzip_logistic --results_file + kdd_nsl/reports/gzip_logistic.csv + deps: + - path: kdd_nsl/reports/gzip_logistic/ + hash: md5 + md5: bca1b51ebae4e3ef166f9424a0f8c1ff.dir + size: 4923952 + nfiles: 3945 + outs: + - path: kdd_nsl/reports/gzip_logistic.csv + hash: md5 + md5: 07859f070e6b9246456e860d63ab4438 + size: 2149350 + compile@kdd_nsl-gzip_svc: + cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/gzip_svc --results_file + kdd_nsl/reports/gzip_svc.csv + deps: + - path: kdd_nsl/reports/gzip_svc/ + hash: md5 + md5: 907ec439b02a0d2b3ba36d54e250ff89.dir + size: 4798455 + nfiles: 4393 + outs: + - path: kdd_nsl/reports/gzip_svc.csv + hash: md5 + md5: b25b5925936e935b62cdc6bd5b96d8d3 + size: 2257942 + compile@sms_spam-gzip_logistic: + cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/gzip_logistic --results_file + sms_spam/reports/gzip_logistic.csv + deps: + - path: sms_spam/reports/gzip_logistic/ + hash: md5 + md5: c70a60ca7e7e433d1cbd21bfddd26320.dir + size: 2212768 + nfiles: 1438 + outs: + - path: sms_spam/reports/gzip_logistic.csv + hash: md5 + md5: 34643e6fbb37caef6b6f9054cb1b5203 + size: 754980 + compile@ddos-gzip_svc: + cmd: python -m deckard.layers.compile --report_folder ddos/reports/gzip_svc --results_file + ddos/reports/gzip_svc.csv + deps: + - path: ddos/reports/gzip_svc/ + hash: md5 + md5: 3b3fdb3e3d2321e8ee5dc36311626231.dir + size: 6101649 + nfiles: 5283 + outs: + - path: ddos/reports/gzip_svc.csv + hash: md5 + md5: 7bd491b47bf7d5f373cb825e9e3d0c4c + size: 2689051 + compile@sms_spam-gzip_svc: + cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/gzip_svc --results_file + sms_spam/reports/gzip_svc.csv + deps: + - path: sms_spam/reports/gzip_svc/ + hash: md5 + md5: 52af2b025a2aafa3e4a78db0bf221f59.dir + size: 2173475 + nfiles: 1536 + outs: + - path: sms_spam/reports/gzip_svc.csv + hash: md5 + md5: 12c2eec80495a5fb326dbed7c4cfe382 + size: 758618 + clean@truthseeker-gzip_svc: + cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_svc.csv + -o truthseeker/plots/clean/gzip_svc.csv -c conf/clean.yaml + deps: + - path: truthseeker/reports/gzip_svc.csv + hash: md5 + md5: 746aae81f4af3c8ce4c8c7e3c3e866b1 + size: 870818 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: truthseeker/plots/clean/gzip_svc.csv + hash: md5 + md5: cdb96b7ba00dc0bf6b4c8db38311447b + size: 679004 + clean@kdd_nsl-gzip_svc: + cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_svc.csv -o kdd_nsl/plots/clean/gzip_svc.csv + -c conf/clean.yaml + deps: + - path: kdd_nsl/reports/gzip_svc.csv + hash: md5 + md5: b25b5925936e935b62cdc6bd5b96d8d3 + size: 2257942 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: kdd_nsl/plots/clean/gzip_svc.csv + hash: md5 + md5: a359fb46b83265dec352e0af17f19cb2 + size: 1771361 + clean@kdd_nsl-gzip_knn: + cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_knn.csv -o kdd_nsl/plots/clean/gzip_knn.csv + -c conf/clean.yaml + deps: + - path: kdd_nsl/reports/gzip_knn.csv + hash: md5 + md5: 17f27e4404093a5b50a74ca0af24e4db + size: 1964725 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: kdd_nsl/plots/clean/gzip_knn.csv + hash: md5 + md5: 686b0f04494630491244a6ead99949b7 + size: 996268 + clean@ddos-gzip_knn: + cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_knn.csv -o ddos/plots/clean/gzip_knn.csv + -c conf/clean.yaml + deps: + - path: ddos/reports/gzip_knn.csv + hash: md5 + md5: fe28ae14c5cc37ee8eb5e705c3610da8 + size: 2899113 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: ddos/plots/clean/gzip_knn.csv + hash: md5 + md5: ad6773d0af82535d3c525f8bf405bbfe + size: 1919757 + clean@ddos-gzip_svc: + cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_svc.csv -o ddos/plots/clean/gzip_svc.csv + -c conf/clean.yaml + deps: + - path: ddos/reports/gzip_svc.csv + hash: md5 + md5: 7bd491b47bf7d5f373cb825e9e3d0c4c + size: 2689051 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: ddos/plots/clean/gzip_svc.csv + hash: md5 + md5: 45515bad8f1a4167a7a64d0a3d62464e + size: 1842449 + clean@kdd_nsl-gzip_logistic: + cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_logistic.csv + -o kdd_nsl/plots/clean/gzip_logistic.csv -c conf/clean.yaml + deps: + - path: kdd_nsl/reports/gzip_logistic.csv + hash: md5 + md5: 07859f070e6b9246456e860d63ab4438 + size: 2149350 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: kdd_nsl/plots/clean/gzip_logistic.csv + hash: md5 + md5: 82d8bddbe4db8eb6835d00931af7fc12 + size: 1456814 + clean@truthseeker-gzip_knn: + cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_knn.csv + -o truthseeker/plots/clean/gzip_knn.csv -c conf/clean.yaml + deps: + - path: truthseeker/reports/gzip_knn.csv + hash: md5 + md5: 183afe36078f60e3e478f3813b1b52a7 + size: 711959 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: truthseeker/plots/clean/gzip_knn.csv + hash: md5 + md5: dbbbb4c6ab13f540b1b4d9ee23d4a91a + size: 354842 + clean@ddos-gzip_logistic: + cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_logistic.csv -o + ddos/plots/clean/gzip_logistic.csv -c conf/clean.yaml + deps: + - path: ddos/reports/gzip_logistic.csv + hash: md5 + md5: 7ff452295887d9c84250c7375b7ea58a + size: 2606734 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: ddos/plots/clean/gzip_logistic.csv + hash: md5 + md5: a7d5cf7362711724ae19bba3becf66d2 + size: 1523208 + clean@sms_spam-gzip_knn: + cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_knn.csv -o + sms_spam/plots/clean/gzip_knn.csv -c conf/clean.yaml + deps: + - path: sms_spam/reports/gzip_knn.csv + hash: md5 + md5: ee7ee47f5ee27acca9e58b9249ecb954 + size: 695526 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: sms_spam/plots/clean/gzip_knn.csv + hash: md5 + md5: 020bbec4f2594935bd33efdcdf90eba7 + size: 358497 + clean@sms_spam-gzip_logistic: + cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_logistic.csv + -o sms_spam/plots/clean/gzip_logistic.csv -c conf/clean.yaml + deps: + - path: sms_spam/reports/gzip_logistic.csv + hash: md5 + md5: 34643e6fbb37caef6b6f9054cb1b5203 + size: 754980 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: sms_spam/plots/clean/gzip_logistic.csv + hash: md5 + md5: d9a1be37cfb498a7d87c116db6f553e2 + size: 497702 + clean@sms_spam-gzip_svc: + cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_svc.csv -o + sms_spam/plots/clean/gzip_svc.csv -c conf/clean.yaml + deps: + - path: sms_spam/reports/gzip_svc.csv + hash: md5 + md5: 12c2eec80495a5fb326dbed7c4cfe382 + size: 758618 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: sms_spam/plots/clean/gzip_svc.csv + hash: md5 + md5: 4455964d2014f4705b4ea3191cef40b2 + size: 588874 + clean@truthseeker-gzip_logistic: + cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_logistic.csv + -o truthseeker/plots/clean/gzip_logistic.csv -c conf/clean.yaml + deps: + - path: truthseeker/reports/gzip_logistic.csv + hash: md5 + md5: 276fcd9d025d60418d6a92db6bee859e + size: 748894 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: truthseeker/plots/clean/gzip_logistic.csv + hash: md5 + md5: 82450f3b94f517f586b35ed85b494add + size: 417258 + copy@sms_spam: + cmd: 'rm -rf ~/Gzip-KNN/figs/sms_spam/ && mkdir -p ~/Gzip-KNN/figs/sms_spam/ && + cp -r sms_spam/plots/* ~/Gzip-KNN/figs/sms_spam/ ' + deps: + - path: sms_spam/plots/ + hash: md5 + md5: b4562b1ad06e680bf0247d4e8dab85c1.dir + size: 10160120 + nfiles: 19 + copy@truthseeker: + cmd: 'rm -rf ~/Gzip-KNN/figs/truthseeker/ && mkdir -p ~/Gzip-KNN/figs/truthseeker/ + && cp -r truthseeker/plots/* ~/Gzip-KNN/figs/truthseeker/ ' + deps: + - path: truthseeker/plots/ + hash: md5 + md5: 47a062972487c796e962fa241d4bf108.dir + size: 8761443 + nfiles: 18 + copy@kdd_nsl: + cmd: 'rm -rf ~/Gzip-KNN/figs/kdd_nsl/ && mkdir -p ~/Gzip-KNN/figs/kdd_nsl/ && + cp -r kdd_nsl/plots/* ~/Gzip-KNN/figs/kdd_nsl/ ' + deps: + - path: kdd_nsl/plots/ + hash: md5 + md5: 526bfd7a3ffd1b1cee332632d79a96f8.dir + size: 13281984 + nfiles: 18 + copy@ddos: + cmd: 'rm -rf ~/Gzip-KNN/figs/ddos/ && mkdir -p ~/Gzip-KNN/figs/ddos/ && cp -r + ddos/plots/* ~/Gzip-KNN/figs/ddos/ ' + deps: + - path: ddos/plots/ + hash: md5 + md5: 22ac4455d4f24b7a0624f5d670f81e24.dir + size: 15551940 + nfiles: 19 + condense@truthseeker-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.test_size=100 model_name=gzip_knn model=best_gzip_knn_truthseeker + hydra.sweeper.study_name=condense_gzip_knn_truthseeker hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/gzip_knn/ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/study.csv + model.init.m='tag(log, interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn + files.directory=truthseeker files.reports=reports/condense/gzip_knn/ hydra.launcher.n_jobs=16 + --config-name condense --multirun + deps: + - path: conf/model/best_gzip_knn_truthseeker.yaml + hash: md5 + md5: 79baf4709c4a5f2535059ef8d1b6a082 + size: 258 + - path: truthseeker/logs/method/ + hash: md5 + md5: 6f6693db2bb9520dc7956f0d0c003e23.dir + size: 116543 + nfiles: 44 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: ??? + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 2 + direction: ${direction} + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: truthseeker/logs/condense/gzip_knn/ + hash: md5 + md5: 029aa9a618d0edd127756b0b724a1742.dir + size: 1568426 + nfiles: 513 + - path: truthseeker/reports/condense/gzip_knn/ + hash: md5 + md5: ef4ee3a0a4c954cea9b4f557a216e421.dir + size: 353591 + nfiles: 374 + plot@ddos-gzip_knn: + cmd: python -m deckard.layers.plots --path ddos/plots/ --file ddos/plots/clean_gzip_knn.csv -c + conf/plots.yaml + deps: + - path: ddos/plots/clean_gzip_knn.csv + hash: md5 + md5: c730af75faf35ba958b15b2da82b25be + size: 451405 + params: + conf/plots.yaml: + cat_plot: + - file: symmetric_vs_metric.pdf + x: model.init.symmetric + y: accuracy + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Accuracy + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: linear + ylim: + - 0 + - 1 + - file: symmetric_vs_metric_train_time.pdf + x: model.init.symmetric + y: train_time + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Training Time (s) + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: log + - file: models_vs_accuracy.pdf + x: model_name + y: accuracy + hue: dataset + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Accuracy + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: linear + ylim: + - 0 + - 1 + rotation: 90 + - file: models_vs_train_time.pdf + x: model_name + y: train_time + hue: dataset + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Training Time (s) + legend_title: Samples + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: log + - file: models_vs_predict_time.pdf + x: model_name + y: predict_time + hue: dataset + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Prediction Time (s) + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: log + rotation: 90 + line_plot: + - file: metric_vs_accuracy.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + - file: metric_vs_train_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: log + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + - file: metric_vs_predict_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: log + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + outs: + - path: ddos/plots/metric_vs_accuracy.pdf + hash: md5 + md5: b8279045dcf3a1fc574578e991427e73 + size: 23629 + - path: ddos/plots/metric_vs_predict_time.pdf + hash: md5 + md5: 1512c0c363753afc28a3c093cc8e252c + size: 22784 + - path: ddos/plots/metric_vs_train_time.pdf + hash: md5 + md5: dd17a922e53b59b2d9b2d91c1237bb54 + size: 22329 + - path: ddos/plots/models_vs_accuracy.pdf + hash: md5 + md5: bbbc08a7161735b6174984688003809f + size: 13970 + - path: ddos/plots/models_vs_predict_time.pdf + hash: md5 + md5: 9b6acd61045df87af51626be2bdff7ab + size: 15507 + - path: ddos/plots/models_vs_train_time.pdf + hash: md5 + md5: 6a2303b531dfc78f20d9bf3dc62d8d42 + size: 16118 + - path: ddos/plots/symmetric_vs_metric.pdf + hash: md5 + md5: 9cd54391a16400664710c9f0589a4d5f + size: 22044 + - path: ddos/plots/symmetric_vs_metric_train_time.pdf + hash: md5 + md5: 0397f39f681300638b6fcc7c2d4e3bda + size: 21616 + plot@kdd_nsl-gzip_knn: + cmd: python -m deckard.layers.plots --path kdd_nsl/plots/ --file kdd_nsl/plots/clean_gzip_knn.csv -c + conf/plots.yaml + deps: + - path: kdd_nsl/plots/clean_gzip_knn.csv + hash: md5 + md5: 1c001f5a7008b439ee4c7946998cbe25 + size: 1002255 + params: + conf/plots.yaml: + cat_plot: + - file: symmetric_vs_metric.pdf + x: model.init.symmetric + y: accuracy + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Accuracy + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: linear + ylim: + - 0 + - 1 + - file: symmetric_vs_metric_train_time.pdf + x: model.init.symmetric + y: train_time + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Training Time (s) + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: log + - file: models_vs_accuracy.pdf + x: model_name + y: accuracy + hue: dataset + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Accuracy + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: linear + ylim: + - 0 + - 1 + rotation: 90 + - file: models_vs_train_time.pdf + x: model_name + y: train_time + hue: dataset + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Training Time (s) + legend_title: Samples + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: log + - file: models_vs_predict_time.pdf + x: model_name + y: predict_time + hue: dataset + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Prediction Time (s) + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: log + rotation: 90 + line_plot: + - file: metric_vs_accuracy.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + - file: metric_vs_train_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: log + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + - file: metric_vs_predict_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: log + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + outs: + - path: kdd_nsl/plots/metric_vs_accuracy.pdf + hash: md5 + md5: a146ab8c45d548ecd6c285a40c5d49e7 + size: 23228 + - path: kdd_nsl/plots/metric_vs_predict_time.pdf + hash: md5 + md5: 59f7befb701cf34c5bf62a78206d7867 + size: 22642 + - path: kdd_nsl/plots/metric_vs_train_time.pdf + hash: md5 + md5: 938036a897293cbf7dc0b4caa19a5596 + size: 22182 + - path: kdd_nsl/plots/models_vs_accuracy.pdf + hash: md5 + md5: 0dad2f21fc6049c3a24972a35514ee71 + size: 15035 + - path: kdd_nsl/plots/models_vs_predict_time.pdf + hash: md5 + md5: 4361ffb492bff25d3cde95fcdb941ced + size: 16578 + - path: kdd_nsl/plots/models_vs_train_time.pdf + hash: md5 + md5: 416681afbf2e0e87dcc7dfe97f0835fc + size: 16239 + - path: kdd_nsl/plots/symmetric_vs_metric.pdf + hash: md5 + md5: 05a28fb9adea7b847f396fdd96c37d02 + size: 22208 + - path: kdd_nsl/plots/symmetric_vs_metric_train_time.pdf + hash: md5 + md5: 0a0a9daf98ab6efe98cb31b69cba2c65 + size: 21578 + plot@truthseeker-gzip_knn: + cmd: python -m deckard.layers.plots --path truthseeker/plots/ --file truthseeker/plots/clean_gzip_knn.csv -c + conf/plots.yaml + deps: + - path: truthseeker/plots/clean_gzip_knn.csv + hash: md5 + md5: ff0162ac672b57d59126b965580901d9 + size: 620009 + params: + conf/plots.yaml: + cat_plot: + - file: symmetric_vs_metric.pdf + x: model.init.symmetric + y: accuracy + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Accuracy + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: linear + ylim: + - 0 + - 1 + - file: symmetric_vs_metric_train_time.pdf + x: model.init.symmetric + y: train_time + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Training Time (s) + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: log + - file: models_vs_accuracy.pdf + x: model_name + y: accuracy + hue: dataset + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Accuracy + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: linear + ylim: + - 0 + - 1 + rotation: 90 + - file: models_vs_train_time.pdf + x: model_name + y: train_time + hue: dataset + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Training Time (s) + legend_title: Samples + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: log + - file: models_vs_predict_time.pdf + x: model_name + y: predict_time + hue: dataset + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Prediction Time (s) + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: log + rotation: 90 + line_plot: + - file: metric_vs_accuracy.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + - file: metric_vs_train_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: log + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + - file: metric_vs_predict_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: log + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + outs: + - path: truthseeker/plots/metric_vs_accuracy.pdf + hash: md5 + md5: 3cef9a04adf0d3378d4627c1a8b097a7 + size: 23348 + - path: truthseeker/plots/metric_vs_predict_time.pdf + hash: md5 + md5: a4a5f2426ffaf289e124fb09235e374b + size: 22838 + - path: truthseeker/plots/metric_vs_train_time.pdf + hash: md5 + md5: cda8914da9fabcfb40ea1eb0943e28d3 + size: 22333 + - path: truthseeker/plots/models_vs_accuracy.pdf + hash: md5 + md5: 7ef865e460d2652c873cfe333e7a308d + size: 15215 + - path: truthseeker/plots/models_vs_predict_time.pdf + hash: md5 + md5: eb57bd848d231a47615e311dbd1102b1 + size: 17930 + - path: truthseeker/plots/models_vs_train_time.pdf + hash: md5 + md5: e7bc6589ed86e8f5c3fbb5a747c652fe + size: 17739 + - path: truthseeker/plots/symmetric_vs_metric.pdf + hash: md5 + md5: 0e7c326bef4c0d835f810c67172b2698 + size: 22057 + - path: truthseeker/plots/symmetric_vs_metric_train_time.pdf + hash: md5 + md5: ce21956e382cc48f0a71ef7ccfd79751 + size: 21593 + plot@sms_spam-gzip_knn: + cmd: python -m deckard.layers.plots --path sms_spam/plots/ --file sms_spam/plots/clean_gzip_knn.csv -c + conf/plots.yaml + deps: + - path: sms_spam/plots/clean_gzip_knn.csv + hash: md5 + md5: 13a5803849f7dfdefe18ba16b0a5010f + size: 448070 + params: + conf/plots.yaml: + cat_plot: + - file: symmetric_vs_metric.pdf + x: model.init.symmetric + y: accuracy + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Accuracy + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: linear + ylim: + - 0 + - 1 + - file: symmetric_vs_metric_train_time.pdf + x: model.init.symmetric + y: train_time + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Training Time (s) + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: log + - file: models_vs_accuracy.pdf + x: model_name + y: accuracy + hue: dataset + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Accuracy + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: linear + ylim: + - 0 + - 1 + rotation: 90 + - file: models_vs_train_time.pdf + x: model_name + y: train_time + hue: dataset + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Training Time (s) + legend_title: Samples + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: log + - file: models_vs_predict_time.pdf + x: model_name + y: predict_time + hue: dataset + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Prediction Time (s) + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + set: + yscale: log + rotation: 90 + line_plot: + - file: metric_vs_accuracy.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + - file: metric_vs_train_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: log + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + - file: metric_vs_predict_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: log + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + outs: + - path: sms_spam/plots/metric_vs_accuracy.pdf + hash: md5 + md5: 507715814c07145dbb140b2b6714973b + size: 23499 + - path: sms_spam/plots/metric_vs_predict_time.pdf + hash: md5 + md5: 97ec5498625837c79fc02850afba09f1 + size: 22606 + - path: sms_spam/plots/metric_vs_train_time.pdf + hash: md5 + md5: f4c9d0805ea5d0ac8e1a2210ee788d47 + size: 22104 + - path: sms_spam/plots/models_vs_accuracy.pdf + hash: md5 + md5: 2eb9ea23cba4e5b734565b7aacdcf43c + size: 14146 + - path: sms_spam/plots/models_vs_predict_time.pdf + hash: md5 + md5: b518bf6b070e7916ad71febd3d3face6 + size: 15523 + - path: sms_spam/plots/models_vs_train_time.pdf + hash: md5 + md5: 617f20892ba643f5c47077af63ae727f + size: 14895 + - path: sms_spam/plots/symmetric_vs_metric.pdf + hash: md5 + md5: 40aa8607331327c4f667fda367defb5f + size: 22033 + - path: sms_spam/plots/symmetric_vs_metric_train_time.pdf + hash: md5 + md5: 775c0bdfc7d9524f1e63b8879ddefccd + size: 21590 + merge@truthseeker: + cmd: python merge.py --big_dir truthseeker/plots/ --data_file clean/gzip_knn.csv + --little_dir_data_file clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder + truthseeker/plots --output_file merged.csv + deps: + - path: truthseeker/plots/clean/gzip_knn.csv + hash: md5 + md5: 1f46e4b3afd875ee11939b23bd1c0852 + size: 560551 + - path: truthseeker/plots/clean/gzip_logistic.csv + hash: md5 + md5: 82450f3b94f517f586b35ed85b494add + size: 417258 + - path: truthseeker/plots/clean/gzip_svc.csv + hash: md5 + md5: cdb96b7ba00dc0bf6b4c8db38311447b + size: 679004 + outs: + - path: truthseeker/plots/merged.csv + hash: md5 + md5: a9b4f71f4d7eccde5a901730969b0bb1 + size: 1711555 + merge@sms_spam: + cmd: python merge.py --big_dir sms_spam/plots/ --data_file clean/gzip_knn.csv + --little_dir_data_file clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder + sms_spam/plots --output_file merged.csv + deps: + - path: sms_spam/plots/clean/gzip_knn.csv + hash: md5 + md5: 020bbec4f2594935bd33efdcdf90eba7 + size: 358497 + - path: sms_spam/plots/clean/gzip_logistic.csv + hash: md5 + md5: d9a1be37cfb498a7d87c116db6f553e2 + size: 497702 + - path: sms_spam/plots/clean/gzip_svc.csv + hash: md5 + md5: 4455964d2014f4705b4ea3191cef40b2 + size: 588874 + outs: + - path: sms_spam/plots/merged.csv + hash: md5 + md5: 3e3e63943b3d62dddc79e554cb691405 + size: 1492939 + merge@ddos: + cmd: python merge.py --big_dir ddos/plots/ --data_file clean/gzip_knn.csv --little_dir_data_file + clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder ddos/plots --output_file + merged.csv + deps: + - path: ddos/plots/clean/gzip_knn.csv + hash: md5 + md5: ad6773d0af82535d3c525f8bf405bbfe + size: 1919757 + - path: ddos/plots/clean/gzip_logistic.csv + hash: md5 + md5: a7d5cf7362711724ae19bba3becf66d2 + size: 1523208 + - path: ddos/plots/clean/gzip_svc.csv + hash: md5 + md5: 45515bad8f1a4167a7a64d0a3d62464e + size: 1842449 + outs: + - path: ddos/plots/merged.csv + hash: md5 + md5: 2fd123789b3c749a653aa9c142d23858 + size: 5465498 + merge@kdd_nsl: + cmd: python merge.py --big_dir kdd_nsl/plots/ --data_file clean/gzip_knn.csv --little_dir_data_file + clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder kdd_nsl/plots --output_file + merged.csv + deps: + - path: kdd_nsl/plots/clean/gzip_knn.csv + hash: md5 + md5: 686b0f04494630491244a6ead99949b7 + size: 996268 + - path: kdd_nsl/plots/clean/gzip_logistic.csv + hash: md5 + md5: 82d8bddbe4db8eb6835d00931af7fc12 + size: 1456814 + - path: kdd_nsl/plots/clean/gzip_svc.csv + hash: md5 + md5: a359fb46b83265dec352e0af17f19cb2 + size: 1771361 + outs: + - path: kdd_nsl/plots/merged.csv + hash: md5 + md5: 7817c0dd6f149eb072f4a5c787fa9655 + size: 4361588 + plot@kdd_nsl: + cmd: python -m deckard.layers.plots --path kdd_nsl/plots/ --file kdd_nsl/plots/merged.csv -c + conf/plots.yaml + deps: + - path: kdd_nsl/plots/merged.csv + hash: md5 + md5: 7817c0dd6f149eb072f4a5c787fa9655 + size: 4361588 + params: + conf/plots.yaml: + cat_plot: + - file: symmetric_vs_metric.pdf + x: model.init.symmetric + y: accuracy + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Accuracy + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: linear + ylim: + - 0 + - 1 + - file: symmetric_vs_metric_train_time.pdf + x: model.init.symmetric + y: train_time + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Training Time (s) + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: log + - file: models_vs_accuracy.pdf + x: model_name + y: accuracy + hue: data.sample.train_size + errorbar: se + kind: boxen + titles: + xlabels: Model + ylabels: Accuracy + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: linear + ylim: + - 0 + - 1 + rotation: 90 + - file: models_vs_train_time.pdf + x: model_name + y: accuracy + hue: data.sample.train_size + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Training Time (s) + legend_title: Samples + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: log + - file: models_vs_predict_time.pdf + x: model_name + y: accuracy + hue: data.sample.train_size + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Prediction Time (s) + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: log + rotation: 90 + line_plot: + - file: metric_vs_accuracy.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_train_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_predict_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + outs: + - path: kdd_nsl/plots/metric_vs_accuracy.pdf + hash: md5 + md5: 2abfc1441c3515f07d2e28459e730a4f + size: 24689 + - path: kdd_nsl/plots/metric_vs_predict_time.pdf + hash: md5 + md5: d91c94bf17617b79b2a417710efb9dfc + size: 23239 + - path: kdd_nsl/plots/metric_vs_train_time.pdf + hash: md5 + md5: d2c40b3e36886868c650917d02015be4 + size: 24227 + - path: kdd_nsl/plots/models_vs_accuracy.pdf + hash: md5 + md5: c6807ba0356e42159d683a2b3ab610a9 + size: 23546 + - path: kdd_nsl/plots/models_vs_predict_time.pdf + hash: md5 + md5: 2f6d79e1a5164884b87ef3f40bdafeeb + size: 19370 + - path: kdd_nsl/plots/models_vs_train_time.pdf + hash: md5 + md5: 30ed28915c3ff6de16fffbf8c6bdda45 + size: 18949 + - path: kdd_nsl/plots/symmetric_vs_metric.pdf + hash: md5 + md5: 1d0bb7d03823bb54b5b12b50dbc6615c + size: 22232 + - path: kdd_nsl/plots/symmetric_vs_metric_train_time.pdf + hash: md5 + md5: 802d5119895198601ba2ee24b3cc9528 + size: 21618 + plot@truthseeker: + cmd: python -m deckard.layers.plots --path truthseeker/plots/ --file truthseeker/plots/merged.csv -c + conf/plots.yaml + deps: + - path: truthseeker/plots/merged.csv + hash: md5 + md5: a9b4f71f4d7eccde5a901730969b0bb1 + size: 1711555 + params: + conf/plots.yaml: + cat_plot: + - file: symmetric_vs_metric.pdf + x: model.init.symmetric + y: accuracy + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Accuracy + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: linear + ylim: + - 0 + - 1 + - file: symmetric_vs_metric_train_time.pdf + x: model.init.symmetric + y: train_time + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Training Time (s) + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: log + - file: models_vs_accuracy.pdf + x: model_name + y: accuracy + hue: data.sample.train_size + errorbar: se + kind: boxen + titles: + xlabels: Model + ylabels: Accuracy + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: linear + ylim: + - 0 + - 1 + rotation: 90 + - file: models_vs_train_time.pdf + x: model_name + y: accuracy + hue: data.sample.train_size + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Training Time (s) + legend_title: Samples + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: log + - file: models_vs_predict_time.pdf + x: model_name + y: accuracy + hue: data.sample.train_size + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Prediction Time (s) + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: log + rotation: 90 + line_plot: + - file: metric_vs_accuracy.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_train_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_predict_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + outs: + - path: truthseeker/plots/metric_vs_accuracy.pdf + hash: md5 + md5: 935a8c7365ac4b738a1ab222357db671 + size: 23824 + - path: truthseeker/plots/metric_vs_predict_time.pdf + hash: md5 + md5: d5095d1375ed12b1a9b9f8ce5bfee839 + size: 22984 + - path: truthseeker/plots/metric_vs_train_time.pdf + hash: md5 + md5: c6dec8707d3da6a57eb64874b8489aa1 + size: 23404 + - path: truthseeker/plots/models_vs_accuracy.pdf + hash: md5 + md5: c09acc549b30af58463a3a8af31b80d1 + size: 20437 + - path: truthseeker/plots/models_vs_predict_time.pdf + hash: md5 + md5: ff7ffac5905b059ec6670c9220caf124 + size: 18153 + - path: truthseeker/plots/models_vs_train_time.pdf + hash: md5 + md5: f48cdb573700e225810e4ed960768e57 + size: 17725 + - path: truthseeker/plots/symmetric_vs_metric.pdf + hash: md5 + md5: 4b92b154563b9c13bb5f177d0e106002 + size: 22192 + - path: truthseeker/plots/symmetric_vs_metric_train_time.pdf + hash: md5 + md5: 2013309b971cea5728652df1a18ece16 + size: 21586 + plot@sms_spam: + cmd: python -m deckard.layers.plots --path sms_spam/plots/ --file sms_spam/plots/merged.csv -c + conf/plots.yaml + deps: + - path: sms_spam/plots/merged.csv + hash: md5 + md5: 3e3e63943b3d62dddc79e554cb691405 + size: 1492939 + params: + conf/plots.yaml: + cat_plot: + - file: symmetric_vs_metric.pdf + x: model.init.symmetric + y: accuracy + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Accuracy + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: linear + ylim: + - 0 + - 1 + - file: symmetric_vs_metric_train_time.pdf + x: model.init.symmetric + y: train_time + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Training Time (s) + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: log + - file: models_vs_accuracy.pdf + x: model_name + y: accuracy + hue: data.sample.train_size + errorbar: se + kind: boxen + titles: + xlabels: Model + ylabels: Accuracy + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: linear + ylim: + - 0 + - 1 + rotation: 90 + - file: models_vs_train_time.pdf + x: model_name + y: accuracy + hue: data.sample.train_size + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Training Time (s) + legend_title: Samples + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: log + - file: models_vs_predict_time.pdf + x: model_name + y: accuracy + hue: data.sample.train_size + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Prediction Time (s) + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: log + rotation: 90 + line_plot: + - file: metric_vs_accuracy.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_train_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_predict_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + outs: + - path: sms_spam/plots/metric_vs_accuracy.pdf + hash: md5 + md5: 695e96d374959cef893859230a15f1a7 + size: 24667 + - path: sms_spam/plots/metric_vs_predict_time.pdf + hash: md5 + md5: 857505ffce8416303759a76cb29b26a3 + size: 23552 + - path: sms_spam/plots/metric_vs_train_time.pdf + hash: md5 + md5: 98b34d861b84d36cb30f58c763445eb7 + size: 23637 + - path: sms_spam/plots/models_vs_accuracy.pdf + hash: md5 + md5: 3d9cda5e091398ec195ff1c763fb0b5a + size: 23033 + - path: sms_spam/plots/models_vs_predict_time.pdf + hash: md5 + md5: 06ae4883133a4f2bb4c19f531c693fdd + size: 19365 + - path: sms_spam/plots/models_vs_train_time.pdf + hash: md5 + md5: f8af33a8abf0caf4fc83a69b6af565a0 + size: 18945 + - path: sms_spam/plots/symmetric_vs_metric.pdf + hash: md5 + md5: 43b4f4865931fca59079491745c20f1c + size: 22231 + - path: sms_spam/plots/symmetric_vs_metric_train_time.pdf + hash: md5 + md5: 4f5b0a9ac3efe2e0daa225f79fe0e40c + size: 21606 + plot@ddos: + cmd: python -m deckard.layers.plots --path ddos/plots/ --file ddos/plots/merged.csv -c + conf/plots.yaml + deps: + - path: ddos/plots/merged.csv + hash: md5 + md5: 2fd123789b3c749a653aa9c142d23858 + size: 5465498 + params: + conf/plots.yaml: + cat_plot: + - file: symmetric_vs_metric.pdf + x: model.init.symmetric + y: accuracy + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Accuracy + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: linear + ylim: + - 0 + - 1 + - file: symmetric_vs_metric_train_time.pdf + x: model.init.symmetric + y: train_time + hue: model.init.metric + errorbar: se + kind: bar + titles: + xlabels: '' + ylabels: Training Time (s) + legend_title: Metrics + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: log + - file: models_vs_accuracy.pdf + x: model_name + y: accuracy + hue: data.sample.train_size + errorbar: se + kind: boxen + titles: + xlabels: Model + ylabels: Accuracy + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: linear + ylim: + - 0 + - 1 + rotation: 90 + - file: models_vs_train_time.pdf + x: model_name + y: accuracy + hue: data.sample.train_size + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Training Time (s) + legend_title: Samples + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: log + - file: models_vs_predict_time.pdf + x: model_name + y: accuracy + hue: data.sample.train_size + errorbar: se + kind: bar + titles: + xlabels: Model + ylabels: Prediction Time (s) + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + set: + yscale: log + rotation: 90 + line_plot: + - file: metric_vs_accuracy.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_train_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_predict_time.pdf + hue: model.init.metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - Gzip + - Pickle + - BZ2 + - Zstd + - Lzma + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + outs: + - path: ddos/plots/metric_vs_accuracy.pdf + hash: md5 + md5: 3b2f9c2885d331a0cadd339177318f3f + size: 24827 + - path: ddos/plots/metric_vs_predict_time.pdf + hash: md5 + md5: 56c78e45d5932c61b339753810a6fed1 + size: 24347 + - path: ddos/plots/metric_vs_train_time.pdf + hash: md5 + md5: 7ba195f1f39c450c7ebd9165eee97f32 + size: 22962 + - path: ddos/plots/models_vs_accuracy.pdf + hash: md5 + md5: 4e5e04199aa08c3098632cf8fad2c744 + size: 23780 + - path: ddos/plots/models_vs_predict_time.pdf + hash: md5 + md5: 41c0c84e0b3b737273692f10c366b275 + size: 19529 + - path: ddos/plots/models_vs_train_time.pdf + hash: md5 + md5: 38dd71a6ac8cd50294d5b81bffd8425b + size: 19106 + - path: ddos/plots/symmetric_vs_metric.pdf + hash: md5 + md5: 72331f97089e5465a2df8a071f6dcf10 + size: 22223 + - path: ddos/plots/symmetric_vs_metric_train_time.pdf + hash: md5 + md5: 3014b61ef7c5fe2e5276149ecd20625b + size: 22143 + condense@truthseeker-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.test_size=100 model_name=gzip_logistic model=best_gzip_logistic_truthseeker + hydra.sweeper.study_name=condense_gzip_logistic_truthseeker hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/gzip_logistic/ + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/study.csv + model.init.m='tag(log, interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn + files.directory=truthseeker files.reports=reports/condense/gzip_logistic/ hydra.launcher.n_jobs=16 + --config-name condense --multirun + deps: + - path: conf/model/best_gzip_logistic_truthseeker.yaml + hash: md5 + md5: 448e12c542f48c074057e9374743d61e + size: 326 + - path: truthseeker/logs/method/ + hash: md5 + md5: 6f6693db2bb9520dc7956f0d0c003e23.dir + size: 116543 + nfiles: 44 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: ??? + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 2 + direction: ${direction} + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: truthseeker/logs/condense/gzip_logistic/ + hash: md5 + md5: 79d74a0dfe0486ada3f03b24c68973dc.dir + size: 1576129 + nfiles: 513 + - path: truthseeker/reports/condense/gzip_logistic/ + hash: md5 + md5: 3de3011b1d96e4990111f5b1601e3b9d.dir + size: 400559 + nfiles: 343 + condense@ddos-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.test_size=100 + model_name=gzip_knn model=best_gzip_knn_ddos hydra.sweeper.study_name=condense_gzip_knn_ddos + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/gzip_knn/ + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/study.csv model.init.m='tag(log, + interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn + files.directory=ddos files.reports=reports/condense/gzip_knn/ hydra.launcher.n_jobs=16 + --config-name condense --multirun + deps: + - path: conf/model/best_gzip_knn_ddos.yaml + hash: md5 + md5: 74721f3e7ab6096e246c486d6080e1ab + size: 259 + - path: ddos/logs/method/ + hash: md5 + md5: 7128c67930147170f54fb89880528199.dir + size: 120518 + nfiles: 48 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: ??? + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 2 + direction: ${direction} + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: ddos/logs/condense/gzip_knn/ + hash: md5 + md5: a2dc5aef876897f53c4076e4012b678a.dir + size: 1542474 + nfiles: 513 + - path: ddos/reports/condense/gzip_knn/ + hash: md5 + md5: 781709e87f2e740f6a0f4e914ee9754f.dir + size: 340848 + nfiles: 379 + condense@ddos-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.test_size=100 + model_name=gzip_logistic model=best_gzip_logistic_ddos hydra.sweeper.study_name=condense_gzip_logistic_ddos + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/gzip_logistic/ + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/study.csv model.init.m='tag(log, + interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn + files.directory=ddos files.reports=reports/condense/gzip_logistic/ hydra.launcher.n_jobs=16 + --config-name condense --multirun + deps: + - path: conf/model/best_gzip_logistic_ddos.yaml + hash: md5 + md5: 9507b28fa5a18b501fe9d80ec33bed1c + size: 334 + - path: ddos/logs/method/ + hash: md5 + md5: 7128c67930147170f54fb89880528199.dir + size: 120518 + nfiles: 48 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: ??? + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 2 + direction: ${direction} + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: ddos/logs/condense/gzip_logistic/ + hash: md5 + md5: 4f8f846516837f0e7cd63c8911aff99a.dir + size: 1623568 + nfiles: 513 + - path: ddos/reports/condense/gzip_logistic/ + hash: md5 + md5: 051b71717b4a7986a1965ebadf448838.dir + size: 350870 + nfiles: 384 + condense@kdd_nsl-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.test_size=100 model_name=gzip_knn model=best_gzip_knn_kdd_nsl hydra.sweeper.study_name=condense_gzip_knn_kdd_nsl + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/condense/gzip_knn/ + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/study.csv model.init.m='tag(log, + interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn + files.directory=kdd_nsl files.reports=reports/condense/gzip_knn/ hydra.launcher.n_jobs=16 + --config-name condense --multirun + deps: + - path: conf/model/best_gzip_knn_kdd_nsl.yaml + hash: md5 + md5: 2697918626643d0136286367b83ee6b9 + size: 258 + - path: kdd_nsl/logs/method/ + hash: md5 + md5: de8764bbb2daa13261f3f5d1dff27a30.dir + size: 79348 + nfiles: 28 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: ??? + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 2 + direction: ${direction} + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: kdd_nsl/logs/condense/gzip_knn/ + hash: md5 + md5: 7d53f3534ceb486e6601d344562cfb32.dir + size: 1564530 + nfiles: 513 + - path: kdd_nsl/reports/condense/gzip_knn/ + hash: md5 + md5: 7e5a283215281be3ee4189ebd5a6e3f1.dir + size: 342924 + nfiles: 384 + parse_params: + cmd: python -m deckard.layers.parse + deps: + - path: conf/data/default.yaml + hash: md5 + md5: 86639d6672cfd9529dda3e2ae4036c01 + size: 22 + - path: conf/default.yaml + hash: md5 + md5: a0a533f84a7ffce197e0db5439219faf + size: 1504 + - path: conf/files/default.yaml + hash: md5 + md5: 7a2df5f8b98699376c3fb4da05d70dea + size: 306 + - path: conf/model/default.yaml + hash: md5 + md5: 39dc7512b1d19fea54550b080d880153 + size: 27 + - path: conf/scorers/default.yaml + hash: md5 + md5: d8d00e7d284ea68b1244743dfef8f00c + size: 280 + outs: + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + test_each_metric@gzip-gzip_knn-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/gzip/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_knn model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/gzip/20 + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_metric/gzip_knn/gzip/20 + hash: md5 + md5: 6091388fcd68296e6ccd16f0955cba96.dir + size: 7683 + nfiles: 4 + - path: kdd_nsl/reports/test_each_metric/gzip_knn/gzip/20/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_metric@zstd-gzip_knn-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/zstd/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_knn model_name=gzip_knn model.init.metric=zstd model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/zstd/20 + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_metric/gzip_knn/zstd/20 + hash: md5 + md5: 704acd4e060b20b19dd8c6528ee42b02.dir + size: 7683 + nfiles: 4 + - path: kdd_nsl/reports/test_each_metric/gzip_knn/zstd/20/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_metric@pkl-gzip_knn-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/pkl/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_knn model_name=gzip_knn model.init.metric=pkl model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/pkl/20 + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_metric/gzip_knn/pkl/20 + hash: md5 + md5: 539ec713f43133226c23d088f60a66bf.dir + size: 7668 + nfiles: 4 + - path: kdd_nsl/reports/test_each_metric/gzip_knn/pkl/20/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_metric@bz2-gzip_knn-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/bz2/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_knn model_name=gzip_knn model.init.metric=bz2 model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/bz2/20 + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_metric/gzip_knn/bz2/20 + hash: md5 + md5: dc85f72896e274b978488f36ec121474.dir + size: 7668 + nfiles: 4 + - path: kdd_nsl/reports/test_each_metric/gzip_knn/bz2/20/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_metric@lzma-gzip_knn-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/lzma/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_knn model_name=gzip_knn model.init.metric=lzma model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/lzma/20 + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_metric/gzip_knn/lzma/20 + hash: md5 + md5: 3e929ed47c2f62267a513fcc9ac7faec.dir + size: 7683 + nfiles: 4 + - path: kdd_nsl/reports/test_each_metric/gzip_knn/lzma/20/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_metric@levenshtein-gzip_knn-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/levenshtein/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_knn model_name=gzip_knn model.init.metric=levenshtein model.init.m=-1 + hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/levenshtein/20 ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_metric/gzip_knn/levenshtein/20 + hash: md5 + md5: 6e719f5801c71fe88793e4a42fe47b68.dir + size: 7767 + nfiles: 4 + - path: kdd_nsl/reports/test_each_metric/gzip_knn/levenshtein/20/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_metric@ratio-gzip_knn-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/ratio/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_knn model_name=gzip_knn model.init.metric=ratio model.init.m=-1 + hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/ratio/20 ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_metric/gzip_knn/ratio/20 + hash: md5 + md5: c7917445640a277d2a898413a74442e3.dir + size: 7677 + nfiles: 4 + - path: kdd_nsl/reports/test_each_metric/gzip_knn/ratio/20/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_metric@hamming-gzip_knn-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/hamming/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_knn model_name=gzip_knn model.init.metric=hamming model.init.m=-1 + hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/hamming/20 ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_metric/gzip_knn/hamming/20 + hash: md5 + md5: 384b5ae13749ca9006486a64dd50faf0.dir + size: 7707 + nfiles: 4 + - path: kdd_nsl/reports/test_each_metric/gzip_knn/hamming/20/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_dataset@gzip_knn-kdd_nsl: + cmd: 'python -m deckard.layers.optimise stage=test_each_dataset files.name=gzip_knn + data.sample.train_size=100 files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl + model_name=gzip_knn model=gzip_knn hydra.run.dir=kdd_nsl/logs/test_each_dataset/gzip_knn + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 41e95614d524a857c0260b13ce77202b + size: 488 + - path: params.yaml + hash: md5 + md5: 9a178db02b5ad8f990c7a557790a36c7 + size: 1381 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_dataset/gzip_knn + hash: md5 + md5: 955370e62c64341f4410f3f46f6d84fd.dir + size: 7263 + nfiles: 4 + - path: kdd_nsl/reports/test_each_dataset/gzip_knn/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_dataset@gzip_knn-truthseeker: + cmd: 'python -m deckard.layers.optimise stage=test_each_dataset files.name=gzip_knn + data.sample.train_size=100 files.directory=truthseeker data=truthseeker dataset=truthseeker + model_name=gzip_knn model=gzip_knn hydra.run.dir=truthseeker/logs/test_each_dataset/gzip_knn + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 41e95614d524a857c0260b13ce77202b + size: 488 + - path: params.yaml + hash: md5 + md5: 9a178db02b5ad8f990c7a557790a36c7 + size: 1381 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/test_each_dataset/gzip_knn + hash: md5 + md5: f8dd2e14f7e12daed6ebfd9a552d6c4e.dir + size: 7305 + nfiles: 4 + - path: truthseeker/reports/test_each_dataset/gzip_knn/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_dataset@ddos-gzip_knn: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_knn data.sample.train_size=100 + files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model=gzip_knn + hydra.run.dir=ddos/logs/train/gzip_knn ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/logs/train/gzip_knn + hash: md5 + md5: 86973d6369f6a61b442f6387478ccde6.dir + size: 8041 + nfiles: 4 + - path: ddos/reports/train/gzip_knn/score_dict.json + hash: md5 + md5: 1269132e68fc8dff521df51cb2fe321c + size: 284 + test_each_dataset@ddos-gzip_svc: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_svc data.sample.train_size=100 + files.directory=ddos data=ddos dataset=ddos model_name=gzip_svc model=gzip_svc + hydra.run.dir=ddos/logs/train/gzip_svc ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/logs/train/gzip_svc + hash: md5 + md5: 67d472318cba51a8f9e7989991cbf09e.dir + size: 8038 + nfiles: 4 + - path: ddos/reports/train/gzip_svc/score_dict.json + hash: md5 + md5: 5728b15f67d338a4bf8160b60715dce8 + size: 283 + test_each_dataset@ddos-gzip_logistic: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_logistic + data.sample.train_size=100 files.directory=ddos data=ddos dataset=ddos model_name=gzip_logistic + model=gzip_logistic hydra.run.dir=ddos/logs/train/gzip_logistic ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: ddos/logs/train/gzip_logistic + hash: md5 + md5: 24fe0f4f52e6989c5a1c65795ea0d936.dir + size: 8173 + nfiles: 4 + - path: ddos/reports/train/gzip_logistic/score_dict.json + hash: md5 + md5: 259b4ae57c0c1e8d08b72f7f888fbe45 + size: 281 + test_each_dataset@truthseeker-gzip_knn: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_knn data.sample.train_size=100 + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn + model=gzip_knn hydra.run.dir=truthseeker/logs/train/gzip_knn ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/train/gzip_knn + hash: md5 + md5: ba3eb31317c073b3b07a9c9d1948e656.dir + size: 8158 + nfiles: 4 + - path: truthseeker/reports/train/gzip_knn/score_dict.json + hash: md5 + md5: 2088612d107192d0497e9fd2c569818f + size: 283 + test_each_dataset@truthseeker-gzip_svc: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_svc data.sample.train_size=100 + files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_svc + model=gzip_svc hydra.run.dir=truthseeker/logs/train/gzip_svc ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/train/gzip_svc + hash: md5 + md5: 4512bda479ab6cd5ae74e7f575928b9d.dir + size: 8154 + nfiles: 4 + - path: truthseeker/reports/train/gzip_svc/score_dict.json + hash: md5 + md5: 25d8ec2a07497188e4311c5d62f9ddb6 + size: 281 + test_each_dataset@truthseeker-gzip_logistic: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_logistic + data.sample.train_size=100 files.directory=truthseeker data=truthseeker dataset=truthseeker + model_name=gzip_logistic model=gzip_logistic hydra.run.dir=truthseeker/logs/train/gzip_logistic + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: truthseeker/logs/train/gzip_logistic + hash: md5 + md5: e1da0260d3c55bfbf4a44bb1b96206ba.dir + size: 8315 + nfiles: 4 + - path: truthseeker/reports/train/gzip_logistic/score_dict.json + hash: md5 + md5: 9ba0565e8f7dcb14a1e45b8e585d9ccb + size: 283 + test_each_dataset@sms_spam-gzip_knn: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_knn data.sample.train_size=100 + files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn + model=gzip_knn hydra.run.dir=sms_spam/logs/train/gzip_knn ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/logs/train/gzip_knn + hash: md5 + md5: 2066e09b41a2f6ce0c835018278b0dc6.dir + size: 8093 + nfiles: 4 + - path: sms_spam/reports/train/gzip_knn/score_dict.json + hash: md5 + md5: 45ab656d14366622402a687082c5feeb + size: 284 + test_each_dataset@sms_spam-gzip_svc: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_svc data.sample.train_size=100 + files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_svc + model=gzip_svc hydra.run.dir=sms_spam/logs/train/gzip_svc ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/logs/train/gzip_svc + hash: md5 + md5: 4f8d2f14bf8ed23f7443b91640fbb2c0.dir + size: 8090 + nfiles: 4 + - path: sms_spam/reports/train/gzip_svc/score_dict.json + hash: md5 + md5: 6cf7317e720631b93bcd699b22a9c4ec + size: 283 + test_each_dataset@sms_spam-gzip_logistic: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_logistic + data.sample.train_size=100 files.directory=sms_spam data=sms_spam dataset=sms_spam + model_name=gzip_logistic model=gzip_logistic hydra.run.dir=sms_spam/logs/train/gzip_logistic + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: sms_spam/logs/train/gzip_logistic + hash: md5 + md5: e9577cb3ce87a9e0a55da46017111e2a.dir + size: 8225 + nfiles: 4 + - path: sms_spam/reports/train/gzip_logistic/score_dict.json + hash: md5 + md5: 8c39b120c89ed2d1c51c88d99f202ab1 + size: 281 + test_each_dataset@kdd_nsl-gzip_knn: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_knn data.sample.train_size=100 + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model=gzip_knn + hydra.run.dir=kdd_nsl/logs/train/gzip_knn ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/train/gzip_knn + hash: md5 + md5: d9f95ac89efb51e0b9474a50ed1ee34d.dir + size: 8108 + nfiles: 4 + - path: kdd_nsl/reports/train/gzip_knn/score_dict.json + hash: md5 + md5: 1bb23417615a5663b20ae3c9bb05ab41 + size: 284 + test_each_dataset@kdd_nsl-gzip_svc: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_svc data.sample.train_size=100 + files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_svc model=gzip_svc + hydra.run.dir=kdd_nsl/logs/train/gzip_svc ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/train/gzip_svc + hash: md5 + md5: 8efe1af9a07fe35bf35a620aecc9984e.dir + size: 8105 + nfiles: 4 + - path: kdd_nsl/reports/train/gzip_svc/score_dict.json + hash: md5 + md5: 6e851ecef3c53745a566ce54bc9b64e3 + size: 283 + test_each_dataset@kdd_nsl-gzip_logistic: + cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_logistic + data.sample.train_size=100 files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl + model_name=gzip_logistic model=gzip_logistic hydra.run.dir=kdd_nsl/logs/train/gzip_logistic + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/train/gzip_logistic + hash: md5 + md5: b3b1f1813a6bc3b51b1aca53b3730892.dir + size: 8240 + nfiles: 4 + - path: kdd_nsl/reports/train/gzip_logistic/score_dict.json + hash: md5 + md5: ce2f45436d570475e2cd62b1d5417305 + size: 281 + test_each_metric@jaro-gzip_knn-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/jaro/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_knn model_name=gzip_knn model.init.metric=jaro model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/jaro/20 + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_metric/gzip_knn/jaro/20 + hash: md5 + md5: 8b71ff09c44e615322095f861b3f1dca.dir + size: 7662 + nfiles: 4 + - path: kdd_nsl/reports/test_each_metric/gzip_knn/jaro/20/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_metric@jaro_winkler-gzip_knn-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/jaro_winkler/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_knn model_name=gzip_knn model.init.metric=jaro_winkler model.init.m=-1 + hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/jaro_winkler/20 ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_metric/gzip_knn/jaro_winkler/20 + hash: md5 + md5: 2b831c44b315a8b61c3f762b365c8e5f.dir + size: 7782 + nfiles: 4 + - path: kdd_nsl/reports/test_each_metric/gzip_knn/jaro_winkler/20/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_metric@seqratio-gzip_knn-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/seqratio/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_knn model_name=gzip_knn model.init.metric=seqratio model.init.m=-1 + hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/seqratio/20 ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_metric/gzip_knn/seqratio/20 + hash: md5 + md5: ed632f40ed8ff016cb649ab00c408114.dir + size: 7722 + nfiles: 4 + - path: kdd_nsl/reports/test_each_metric/gzip_knn/seqratio/20/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_model@gzip-gzip_knn-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_knn/gzip/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_knn model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_model/gzip_knn/gzip/20 + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_model/gzip_knn/gzip/20 + hash: md5 + md5: c8075fa1867cb00a11f6df654086bd97.dir + size: 7675 + nfiles: 4 + - path: kdd_nsl/reports/test_each_model/gzip_knn/gzip/20/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_model@gzip-gzip_svc-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_svc/gzip/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_svc model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_model/gzip_svc/gzip/20 + ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_model/gzip_svc/gzip/20 + hash: md5 + md5: 6ec9663f42d781dc482f1da6df886312.dir + size: 7678 + nfiles: 4 + - path: kdd_nsl/reports/test_each_model/gzip_svc/gzip/20/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_model@gzip-gzip_logistic-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_logistic/gzip/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_logistic model_name=gzip_knn model.init.metric=gzip model.init.m=-1 + hydra.run.dir=kdd_nsl/logs/test_each_model/gzip_logistic/gzip/20 ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 81a03f1290fe4d5eaa739ba9807b5b20 + size: 488 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/logs/test_each_model/gzip_logistic/gzip/20 + hash: md5 + md5: 8ba9f7659cef2c4d610fece176de1548.dir + size: 7767 + nfiles: 4 + - path: kdd_nsl/reports/test_each_model/gzip_logistic/gzip/20/score_dict.json + hash: md5 + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + grid_search@20-kdd_nsl-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=kdd_nsl/logs/gzip_knn/20 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/20/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_knn/20 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: kdd_nsl/logs/gzip_knn/20 + hash: md5 + md5: 5c03e3e52e7a24e15acbd0b2aadfee35.dir + size: 1389089 + nfiles: 514 + - path: kdd_nsl/reports/gzip_knn/20/train/ + hash: md5 + md5: a7e0e97547bfac97d8518259bffdd4c1.dir + size: 1847622 + nfiles: 1661 + grid_search@20-kdd_nsl-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_kdd_nsl + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/20 + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/20/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_logistic/20 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: kdd_nsl/logs/gzip_logistic/20 + hash: md5 + md5: e7528ce71bad9f745a9f5e4fcf3a2df1.dir + size: 1571121 + nfiles: 514 + - path: kdd_nsl/reports/gzip_logistic/20/train/ + hash: md5 + md5: 127796b95b1817c4b0d9f1846537b0a6.dir + size: 2083086 + nfiles: 1772 + grid_search@20-kdd_nsl-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=kdd_nsl/logs/gzip_svc/20 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/20/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_svc/20 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: kdd_nsl/logs/gzip_svc/20 + hash: md5 + md5: a1cb35a26808d09dac04aef8fc7106cb.dir + size: 1524012 + nfiles: 514 + - path: kdd_nsl/reports/gzip_svc/20/train/ + hash: md5 + md5: f475c4428240afaaf863bb021eb82890.dir + size: 2095726 + nfiles: 2092 + grid_search@20-truthseeker-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=truthseeker/logs/gzip_knn/20 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/20/study.csv + files.directory=truthseeker files.reports=reports/gzip_knn/20 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: truthseeker/logs/gzip_knn/20 + hash: md5 + md5: 21da241789a9856418302895c146cd4d.dir + size: 1370161 + nfiles: 514 + - path: truthseeker/reports/gzip_knn/20/train/ + hash: md5 + md5: 394a7d8c033166c958996d646f822460.dir + size: 376291 + nfiles: 340 + grid_search@20-truthseeker-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_truthseeker + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/20 + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/20/study.csv + files.directory=truthseeker files.reports=reports/gzip_logistic/20 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: truthseeker/logs/gzip_logistic/20 + hash: md5 + md5: 4eceda9fdfa787e48b4a2d397ad89332.dir + size: 1497002 + nfiles: 514 + - path: truthseeker/reports/gzip_logistic/20/train/ + hash: md5 + md5: 9b32f4ef152eda3a3f2e68d424d163d2.dir + size: 555897 + nfiles: 366 + grid_search@20-truthseeker-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=truthseeker/logs/gzip_svc/20 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/20/study.csv + files.directory=truthseeker files.reports=reports/gzip_svc/20 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: truthseeker/logs/gzip_svc/20 + hash: md5 + md5: 20a01b45b6f1901a8e929bf1cbccd349.dir + size: 1473672 + nfiles: 514 + - path: truthseeker/reports/gzip_svc/20/train/ + hash: md5 + md5: a2b059debfa307134c83ec03713e8a50.dir + size: 546743 + nfiles: 384 + grid_search@20-sms_spam-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=sms_spam/logs/gzip_knn/20 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/20/study.csv + files.directory=sms_spam files.reports=reports/gzip_knn/20 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: sms_spam/logs/gzip_knn/20 + hash: md5 + md5: bcee56ea959096e8255fb482a8854457.dir + size: 1381168 + nfiles: 514 + - path: sms_spam/reports/gzip_knn/20/train/ + hash: md5 + md5: 12133daeda911e75210cff4d8a3fa5a7.dir + size: 379524 + nfiles: 326 + grid_search@20-sms_spam-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_sms_spam + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/20 + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/20/study.csv + files.directory=sms_spam files.reports=reports/gzip_logistic/20 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: sms_spam/logs/gzip_logistic/20 + hash: md5 + md5: 5c7265a3ac4bf4774fbb1c440b9910c4.dir + size: 1520121 + nfiles: 514 + - path: sms_spam/reports/gzip_logistic/20/train/ + hash: md5 + md5: 9ae8109f623b19dcbabe51e4401a1f8c.dir + size: 552539 + nfiles: 357 + grid_search@20-sms_spam-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=sms_spam/logs/gzip_svc/20 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/20/study.csv + files.directory=sms_spam files.reports=reports/gzip_svc/20 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: sms_spam/logs/gzip_svc/20 + hash: md5 + md5: fe6324545be6dc97b88326e10a65e815.dir + size: 1451676 + nfiles: 514 + - path: sms_spam/reports/gzip_svc/20/train/ + hash: md5 + md5: 814632194dc03d626a24f0418fd703e1.dir + size: 542357 + nfiles: 384 + grid_search@20-ddos-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20 + data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=ddos/logs/gzip_knn/20 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/20/study.csv + files.directory=ddos files.reports=reports/gzip_knn/20 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: ddos/logs/gzip_knn/20 + hash: md5 + md5: 057fc9613b2210a0dd1e03ef46f3d6bc.dir + size: 1616211 + nfiles: 514 + - path: ddos/reports/gzip_knn/20/train/ + hash: md5 + md5: b0ae22713c6a319a24acb69525a9f01a.dir + size: 1375974 + nfiles: 1536 + grid_search@20-ddos-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20 + data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=ddos/logs/gzip_logistic/20 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/20/study.csv + files.directory=ddos files.reports=reports/gzip_logistic/20 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: ddos/logs/gzip_logistic/20 + hash: md5 + md5: f2c036dc149976bc0de5187f8661669d.dir + size: 1705246 + nfiles: 514 + - path: ddos/reports/gzip_logistic/20/train/ + hash: md5 + md5: 36eee9b3fb432eafed577ca45b477dab.dir + size: 1608552 + nfiles: 1349 + grid_search@20-ddos-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20 + data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=ddos/logs/gzip_svc/20 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/20/study.csv + files.directory=ddos files.reports=reports/gzip_svc/20 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: ddos/logs/gzip_svc/20 + hash: md5 + md5: 5934a7b63c96844a0eaa9ecea06a79c2.dir + size: 1639820 + nfiles: 514 + - path: ddos/reports/gzip_svc/20/train/ + hash: md5 + md5: 0e902831c38cc7b2f2b03d7bb7f4f5cf.dir + size: 1580188 + nfiles: 1536 + grid_search@100-kdd_nsl-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=kdd_nsl/logs/gzip_knn/100 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/100/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_knn/100 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: kdd_nsl/logs/gzip_knn/100 + hash: md5 + md5: aa2209bce9b2f829ca22f244b53ed58f.dir + size: 1416182 + nfiles: 514 + - path: kdd_nsl/reports/gzip_knn/100/train/ + hash: md5 + md5: 1547fa66fbaac37a7badef9b300577a7.dir + size: 1163933 + nfiles: 1000 + grid_search@100-kdd_nsl-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_kdd_nsl + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/100 + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/100/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_logistic/100 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: kdd_nsl/logs/gzip_logistic/100 + hash: md5 + md5: b6e7cf1d3984f8029177576f9668944b.dir + size: 1609157 + nfiles: 514 + - path: kdd_nsl/reports/gzip_logistic/100/train/ + hash: md5 + md5: d40db4814c403a903c7d0cd2a8a5bb7b.dir + size: 1329546 + nfiles: 1093 + grid_search@100-kdd_nsl-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=kdd_nsl/logs/gzip_svc/100 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/100/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_svc/100 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: kdd_nsl/logs/gzip_svc/100 + hash: md5 + md5: 4b96e2a3bb0e0d230ebd96591a16e441.dir + size: 1553624 + nfiles: 514 + - path: kdd_nsl/reports/gzip_svc/100/train/ + hash: md5 + md5: 3cf8a86de1026ead8fcd1b6cda47e910.dir + size: 1247698 + nfiles: 1152 + grid_search@100-truthseeker-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=truthseeker/logs/gzip_knn/100 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/100/study.csv + files.directory=truthseeker files.reports=reports/gzip_knn/100 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: truthseeker/logs/gzip_knn/100 + hash: md5 + md5: 818cba0a8349442987e5d6be1f0672d4.dir + size: 1374869 + nfiles: 514 + - path: truthseeker/reports/gzip_knn/100/train/ + hash: md5 + md5: 261a37d5d497bd477d872aa72a94a13f.dir + size: 394446 + nfiles: 320 + grid_search@100-truthseeker-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_truthseeker + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/100 + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/100/study.csv + files.directory=truthseeker files.reports=reports/gzip_logistic/100 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: truthseeker/logs/gzip_logistic/100 + hash: md5 + md5: dd822b92438871be421644a82afa8e2f.dir + size: 1528739 + nfiles: 514 + - path: truthseeker/reports/gzip_logistic/100/train/ + hash: md5 + md5: d1b22149466a949b86aba9390d7cf992.dir + size: 556386 + nfiles: 365 + grid_search@100-truthseeker-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=truthseeker/logs/gzip_svc/100 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/100/study.csv + files.directory=truthseeker files.reports=reports/gzip_svc/100 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: truthseeker/logs/gzip_svc/100 + hash: md5 + md5: c9493ae71545ccec0ea01adc6d664bce.dir + size: 1505603 + nfiles: 514 + - path: truthseeker/reports/gzip_svc/100/train/ + hash: md5 + md5: c9a4bae4aed04fcdb578f44fba94af87.dir + size: 547282 + nfiles: 384 + grid_search@100-sms_spam-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=sms_spam/logs/gzip_knn/100 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/100/study.csv + files.directory=sms_spam files.reports=reports/gzip_knn/100 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: sms_spam/logs/gzip_knn/100 + hash: md5 + md5: ad8714bbbce96d2c1ff75deda0add5ec.dir + size: 1415136 + nfiles: 514 + - path: sms_spam/reports/gzip_knn/100/train/ + hash: md5 + md5: 6bcf048da228e84a757916c797891044.dir + size: 376546 + nfiles: 331 + find_best_model@ddos-gzip_knn: + cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name + gzip_knn_ddos --config_subdir model --params_file best_gzip_knn_ddos --default_config + gzip_knn + deps: + - path: ddos/logs/gzip_knn/ + hash: md5 + md5: d2c6441e85e3509b8968240a48196d07.dir + size: 4193267 + nfiles: 1542 + outs: + - path: conf/model/best_gzip_knn_ddos.yaml + hash: md5 + md5: bdea475d3a2bc59106f27dccd0fc27fc + size: 419 + find_best_model@ddos-gzip_svc: + cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name + gzip_svc_ddos --config_subdir model --params_file best_gzip_svc_ddos --default_config + gzip_svc + deps: + - path: ddos/logs/gzip_svc/ + hash: md5 + md5: 78cd23f301a93a7c9842abb061e3cc7b.dir + size: 7447727 + nfiles: 2570 + outs: + - path: conf/model/best_gzip_svc_ddos.yaml + hash: md5 + md5: 3a7f27dd470ec9e55c10403814f550f2 + size: 442 + find_best_model@ddos-gzip_logistic: + cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name + gzip_logistic_ddos --config_subdir model --params_file best_gzip_logistic_ddos + --default_config gzip_logistic + deps: + - path: ddos/logs/gzip_logistic/ + hash: md5 + md5: b28cadbd10b9bbe40802e39b1beaee18.dir + size: 6561328 + nfiles: 2056 + outs: + - path: conf/model/best_gzip_logistic_ddos.yaml + hash: md5 + md5: d5e603d6386dd6cf1167088eaecbdde5 + size: 498 + condense@ddos-knn: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 + data.sample.test_size=100 model_name=condensed_knn model=gzip_knn files.directory=ddos + files.reports=reports/condense/knn/ hydra.sweeper.study_name=condense_knn_ddos + hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/knn/ + hydra.callbacks.study_dump.output_file=ddos/logs/knn/study.csv hydra.launcher.n_jobs=-1 + --config-name condense_knn --multirun + deps: + - path: conf/condense_knn.yaml + hash: md5 + md5: abd25d17a742e467d39dda34b448ba88 + size: 2181 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + direction: ${direction} + max_failure_rate: 1.0 + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: ddos/logs/condense/knn/ + hash: md5 + md5: 34f8b7196af71d106965513050a254fb.dir + size: 10910937 + nfiles: 4097 + - path: ddos/reports/condense/knn/ + hash: md5 + md5: 9b6918814be3bea732abc71b8684fd8d.dir + size: 8458502 + nfiles: 9157 + condense@ddos-svc: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 + data.sample.test_size=100 model_name=condensed_svc model=gzip_svc files.directory=ddos + files.reports=reports/condense/svc/ hydra.sweeper.study_name=condense_svc_ddos + hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/svc/ + hydra.callbacks.study_dump.output_file=ddos/logs/svc/study.csv hydra.launcher.n_jobs=-1 + --config-name condense_svc --multirun + deps: + - path: conf/model/best_gzip_svc_ddos.yaml + hash: md5 + md5: 3a7f27dd470ec9e55c10403814f550f2 + size: 442 + - path: ddos/logs/method/ + hash: md5 + md5: a09dd0467b0e8a142d6f32a38f205159.dir + size: 59399 + nfiles: 28 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + direction: ${direction} + max_failure_rate: 1.0 + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: ddos/logs/condense/svc/ + hash: md5 + md5: 6a15cfc205c7382b8d7d6d67d35ddfb0.dir + size: 11072739 + nfiles: 4097 + - path: ddos/reports/condense/svc/ + hash: md5 + md5: daaf428c939e9bfcc233bf88ee39f9fb.dir + size: 2819182 + nfiles: 3072 + condense@ddos-logistic: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 + data.sample.test_size=100 model_name=condensed_logistic model=gzip_logistic + files.directory=ddos files.reports=reports/condense/logistic/ hydra.sweeper.study_name=condense_logistic_ddos + hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/logistic/ + hydra.callbacks.study_dump.output_file=ddos/logs/logistic/study.csv hydra.launcher.n_jobs=-1 + --config-name condense_logistic --multirun + deps: + - path: conf/model/best_gzip_logistic_ddos.yaml + hash: md5 + md5: d5e603d6386dd6cf1167088eaecbdde5 + size: 498 + - path: ddos/logs/method/ + hash: md5 + md5: a09dd0467b0e8a142d6f32a38f205159.dir + size: 59399 + nfiles: 28 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + direction: ${direction} + max_failure_rate: 1.0 + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: ddos/logs/condense/logistic/ + hash: md5 + md5: 064e5768d0155635c9bc6287914ac9f7.dir + size: 11690343 + nfiles: 4097 + - path: ddos/reports/condense/logistic/ + hash: md5 + md5: 7ce841278929a90690417685b7c7f143.dir + size: 5929815 + nfiles: 5888 + grid_search@100-ddos-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 + data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=ddos/logs/gzip_knn/100 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/100/study.csv + files.directory=ddos files.reports=reports/gzip_knn/100 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: ddos/logs/gzip_knn/100 + hash: md5 + md5: 41af522bae6f35684d51a90652c37082.dir + size: 1645388 + nfiles: 514 + - path: ddos/reports/gzip_knn/100/train/ + hash: md5 + md5: b9374a5acb2480c2ed6a35803a344f69.dir + size: 1341749 + nfiles: 1499 + grid_search@100-ddos-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 + data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=ddos/logs/gzip_logistic/100 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/100/study.csv + files.directory=ddos files.reports=reports/gzip_logistic/100 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: ddos/logs/gzip_logistic/100 + hash: md5 + md5: 3f1d14c70e73f668316f86a8d7d0e22b.dir + size: 1733688 + nfiles: 514 + - path: ddos/reports/gzip_logistic/100/train/ + hash: md5 + md5: c839c1faf70de47c057714c3a8bdc52d.dir + size: 1562420 + nfiles: 1315 + grid_search@100-ddos-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 + data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=ddos/logs/gzip_svc/100 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/100/study.csv + files.directory=ddos files.reports=reports/gzip_svc/100 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: ddos/logs/gzip_svc/100 + hash: md5 + md5: 4adc8c896e06e2d7e8450f2b863b95bf.dir + size: 1681042 + nfiles: 514 + - path: ddos/reports/gzip_svc/100/train/ + hash: md5 + md5: 8ad9bbb8a118699458753528a263f5ba.dir + size: 1790102 + nfiles: 1678 + find_best_model@kdd_nsl-gzip_knn: + cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name + gzip_knn_kdd_nsl --config_subdir model --params_file best_gzip_knn_kdd_nsl --default_config + gzip_knn + deps: + - path: kdd_nsl/logs/gzip_knn/ + hash: md5 + md5: 6418750af32f15be9c6f35e0975b3276.dir + size: 4024441 + nfiles: 1542 + outs: + - path: conf/model/best_gzip_knn_kdd_nsl.yaml + hash: md5 + md5: f9ad25a19931041146b4b1eab45fda68 + size: 420 + find_best_model@kdd_nsl-gzip_svc: + cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name + gzip_svc_kdd_nsl --config_subdir model --params_file best_gzip_svc_kdd_nsl --default_config + gzip_svc + deps: + - path: kdd_nsl/logs/gzip_svc/ + hash: md5 + md5: 381879c377b6eeccbb9d1aa42f78fec2.dir + size: 4366326 + nfiles: 1542 + outs: + - path: conf/model/best_gzip_svc_kdd_nsl.yaml + hash: md5 + md5: 0542c20ce7b5a74a20d4ab1c38fdf213 + size: 434 + find_best_model@kdd_nsl-gzip_logistic: + cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name + gzip_logistic_kdd_nsl --config_subdir model --params_file best_gzip_logistic_kdd_nsl + --default_config gzip_logistic + deps: + - path: kdd_nsl/logs/gzip_logistic/ + hash: md5 + md5: 34325e24d16a4af0ec3286ec4b034e14.dir + size: 4504884 + nfiles: 1542 + outs: + - path: conf/model/best_gzip_logistic_kdd_nsl.yaml + hash: md5 + md5: e21d828b4b1ad122d7755e986de5b93d + size: 353 + find_best_model@sms_spam-gzip_knn: + cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name + gzip_knn_sms_spam --config_subdir model --params_file best_gzip_knn_sms_spam + --default_config gzip_knn + deps: + - path: sms_spam/logs/gzip_knn/ + hash: md5 + md5: 689c69db8c621101649ddef5bd0c1bb5.dir + size: 2713750 + nfiles: 1028 + outs: + - path: conf/model/best_gzip_knn_sms_spam.yaml + hash: md5 + md5: 41fad710bcb8b8b8dd548d669b2ed748 + size: 419 + find_best_model@sms_spam-gzip_svc: + cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name + gzip_svc_sms_spam --config_subdir model --params_file best_gzip_svc_sms_spam + --default_config gzip_svc + deps: + - path: sms_spam/logs/gzip_svc/ + hash: md5 + md5: b91e15f0eb5ee57aed8aeb5a5d6feeab.dir + size: 2777710 + nfiles: 1028 + outs: + - path: conf/model/best_gzip_svc_sms_spam.yaml + hash: md5 + md5: bb3008613c3311a696d32fb683732c00 + size: 442 + find_best_model@sms_spam-gzip_logistic: + cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name + gzip_logistic_sms_spam --config_subdir model --params_file best_gzip_logistic_sms_spam + --default_config gzip_logistic + deps: + - path: sms_spam/logs/gzip_logistic/ + hash: md5 + md5: 89191dbe147b40192129776ef2652900.dir + size: 1649284 + nfiles: 578 + outs: + - path: conf/model/best_gzip_logistic_sms_spam.yaml + hash: md5 + md5: fd1d0481be57844d935aea28e995a369 + size: 485 + condense@kdd_nsl-knn: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_knn + model=gzip_knn files.directory=kdd_nsl files.reports=reports/condense/knn/ hydra.sweeper.study_name=condense_knn_kdd_nsl + hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/condense/knn/ + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/knn/study.csv hydra.launcher.n_jobs=-1 + --config-name condense_knn --multirun + deps: + - path: conf/model/best_gzip_knn_kdd_nsl.yaml + hash: md5 + md5: f9ad25a19931041146b4b1eab45fda68 + size: 420 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + direction: ${direction} + max_failure_rate: 1.0 + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: kdd_nsl/logs/condense/knn/ + hash: md5 + md5: 81f50250e51650881283dcf68d43234c.dir + size: 10952920 + nfiles: 4097 + - path: kdd_nsl/reports/condense/knn/ + hash: md5 + md5: 3f8eb680f1f8960490e4581bfa16cfd2.dir + size: 2869636 + nfiles: 3011 + condense@kdd_nsl-svc: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_svc + model=gzip_svc files.directory=kdd_nsl files.reports=reports/condense/svc/ hydra.sweeper.study_name=condense_svc_kdd_nsl + hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/condense/svc/ + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/svc/study.csv hydra.launcher.n_jobs=-1 + --config-name condense_svc --multirun + deps: + - path: conf/model/best_gzip_svc_kdd_nsl.yaml + hash: md5 + md5: 0542c20ce7b5a74a20d4ab1c38fdf213 + size: 434 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + direction: ${direction} + max_failure_rate: 1.0 + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: kdd_nsl/logs/condense/svc/ + hash: md5 + md5: cdf319e0c94e4c6eda84ec9b2e9ea1a9.dir + size: 10708020 + nfiles: 4097 + - path: kdd_nsl/reports/condense/svc/ + hash: md5 + md5: ad27897c6454024915fdcef827219bd3.dir + size: 8340639 + nfiles: 5462 + condense@kdd_nsl-logistic: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_logistic + model=gzip_logistic files.directory=kdd_nsl files.reports=reports/condense/logistic/ + hydra.sweeper.study_name=condense_logistic_kdd_nsl hydra.sweeper.n_trials=1024 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/condense/logistic/ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/logistic/study.csv + hydra.launcher.n_jobs=-1 --config-name condense_logistic --multirun + deps: + - path: conf/model/best_gzip_logistic_kdd_nsl.yaml + hash: md5 + md5: e21d828b4b1ad122d7755e986de5b93d + size: 353 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + direction: ${direction} + max_failure_rate: 1.0 + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: kdd_nsl/logs/condense/logistic/ + hash: md5 + md5: 0ce56c12dc58fe66c1fa6fec867b2cf5.dir + size: 11710344 + nfiles: 4097 + - path: kdd_nsl/reports/condense/logistic/ + hash: md5 + md5: ae358823518ca6759ddfa8d1c738e367.dir + size: 3101125 + nfiles: 2948 + condense@truthseeker-knn: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_knn + model=gzip_knn files.directory=truthseeker files.reports=reports/condense/knn/ + hydra.sweeper.study_name=condense_knn_truthseeker hydra.sweeper.n_trials=1024 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/knn/ hydra.callbacks.study_dump.output_file=truthseeker/logs/knn/study.csv + hydra.launcher.n_jobs=-1 --config-name condense_knn --multirun + deps: + - path: conf/model/best_gzip_knn_truthseeker.yaml + hash: md5 + md5: 79baf4709c4a5f2535059ef8d1b6a082 + size: 258 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + direction: ${direction} + max_failure_rate: 1.0 + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: truthseeker/logs/condense/knn/ + hash: md5 + md5: 3e8b9011ee1c591904115e67db9a1a50.dir + size: 11038890 + nfiles: 4097 + - path: truthseeker/reports/condense/knn/ + hash: md5 + md5: 1565eb2348976cc6ac9108396141080b.dir + size: 2831604 + nfiles: 3016 + condense@truthseeker-svc: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_svc + model=gzip_svc files.directory=truthseeker files.reports=reports/condense/svc/ + hydra.sweeper.study_name=condense_svc_truthseeker hydra.sweeper.n_trials=1024 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/svc/ hydra.callbacks.study_dump.output_file=truthseeker/logs/svc/study.csv + hydra.launcher.n_jobs=-1 --config-name condense_svc --multirun + deps: + - path: conf/model/best_gzip_svc_truthseeker.yaml + hash: md5 + md5: 97d9d5857744b1cc077513ac5a659f62 + size: 302 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + direction: ${direction} + max_failure_rate: 1.0 + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: truthseeker/logs/condense/svc/ + hash: md5 + md5: 845724e35dc3a54bea549410a35d6afd.dir + size: 11192018 + nfiles: 4097 + - path: truthseeker/reports/condense/svc/ + hash: md5 + md5: 6cbdc47d51df656dcf7e8ae6221795b3.dir + size: 2825163 + nfiles: 3064 + condense@truthseeker-logistic: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_logistic + model=gzip_logistic files.directory=truthseeker files.reports=reports/condense/logistic/ + hydra.sweeper.study_name=condense_logistic_truthseeker hydra.sweeper.n_trials=1024 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/logistic/ hydra.callbacks.study_dump.output_file=truthseeker/logs/logistic/study.csv + hydra.launcher.n_jobs=-1 --config-name condense_logistic --multirun + deps: + - path: conf/model/best_gzip_logistic_truthseeker.yaml + hash: md5 + md5: 448e12c542f48c074057e9374743d61e + size: 326 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + direction: ${direction} + max_failure_rate: 1.0 + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: truthseeker/logs/condense/logistic/ + hash: md5 + md5: f7e754346e500d1b007b519d86f4c608.dir + size: 11847643 + nfiles: 4097 + - path: truthseeker/reports/condense/logistic/ + hash: md5 + md5: 8bd6876fc856ea5bd1e95b54093aedb8.dir + size: 2976098 + nfiles: 3011 + condense@sms_spam-knn: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_knn + model=gzip_knn files.directory=sms_spam files.reports=reports/condense/knn/ + hydra.sweeper.study_name=condense_knn_sms_spam hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=sms_spam/logs/condense/knn/ hydra.callbacks.study_dump.output_file=sms_spam/logs/knn/study.csv + hydra.launcher.n_jobs=-1 --config-name condense_knn --multirun + deps: + - path: conf/condense_knn.yaml + hash: md5 + md5: abd25d17a742e467d39dda34b448ba88 + size: 2181 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + direction: ${direction} + max_failure_rate: 1.0 + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: sms_spam/logs/condense/knn/ + hash: md5 + md5: ee1eda16b8989f2a23a7dfeba27b4437.dir + size: 10519093 + nfiles: 4097 + - path: sms_spam/reports/condense/knn/ + hash: md5 + md5: 84b8fcb1e78a8685141409736c6d6afa.dir + size: 4713599 + nfiles: 4258 + condense@sms_spam-svc: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_svc + model=gzip_svc files.directory=sms_spam files.reports=reports/condense/svc/ + hydra.sweeper.study_name=condense_svc_sms_spam hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=sms_spam/logs/condense/svc/ hydra.callbacks.study_dump.output_file=sms_spam/logs/svc/study.csv + hydra.launcher.n_jobs=-1 --config-name condense_svc --multirun + deps: + - path: conf/condense_svc.yaml + hash: md5 + md5: 7a311db45e697a23a2bed8180fd45e64 + size: 2182 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + direction: ${direction} + max_failure_rate: 1.0 + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: sms_spam/logs/condense/svc/ + hash: md5 + md5: 9d28ee3f4494d207369bd35c2f5d2164.dir + size: 11082621 + nfiles: 4097 + - path: sms_spam/reports/condense/svc/ + hash: md5 + md5: 200cad31398ec4545e7a490011218c47.dir + size: 4416840 + nfiles: 3068 + condense@sms_spam-logistic: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_logistic + model=gzip_logistic files.directory=sms_spam files.reports=reports/condense/logistic/ + hydra.sweeper.study_name=condense_logistic_sms_spam hydra.sweeper.n_trials=1024 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/condense/logistic/ hydra.callbacks.study_dump.output_file=sms_spam/logs/logistic/study.csv + hydra.launcher.n_jobs=-1 --config-name condense_logistic --multirun + deps: + - path: conf/condense_logistic.yaml + hash: md5 + md5: 85b6d1d835afd7e95b5b9f804fbd7119 + size: 2326 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/condense.yaml: + hydra: + run: + dir: ${dataset}/logs/condense/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + direction: ${direction} + max_failure_rate: 1.0 + params: + ++data.sample.train_size: 1000 + ++data.sample.random_state: int(interval(10000, 20000)) + model.init.m: tag(log, interval(.01, .1)) + +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: sms_spam/logs/condense/logistic/ + hash: md5 + md5: 3846050e3a2341b246c2c3366debe0dc.dir + size: 11620551 + nfiles: 4097 + - path: sms_spam/reports/condense/logistic/ + hash: md5 + md5: 05562ae582796b70d35ae7062a5030d7.dir + size: 9597627 + nfiles: 6388 + compile@sms_spam-condense/logistic: + cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/condense/logistic --results_file + sms_spam/reports/condense/logistic.csv + deps: + - path: sms_spam/reports/condense/logistic/ + hash: md5 + md5: 05562ae582796b70d35ae7062a5030d7.dir + size: 9597627 + nfiles: 6388 + outs: + - path: sms_spam/reports/condense/logistic.csv + hash: md5 + md5: 7094b26a582820cc1f88512573ce8c25 + size: 3430438 + compile@kdd_nsl-condense/svc: + cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/condense/svc --results_file + kdd_nsl/reports/condense/svc.csv + deps: + - path: kdd_nsl/reports/condense/svc/ + hash: md5 + md5: ad27897c6454024915fdcef827219bd3.dir + size: 8340639 + nfiles: 5462 + outs: + - path: kdd_nsl/reports/condense/svc.csv + hash: md5 + md5: 643a67cb6d5974a787efa6339e3af058 + size: 3003804 + compile@kdd_nsl-condense/logistic: + cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/condense/logistic --results_file + kdd_nsl/reports/condense/logistic.csv + deps: + - path: kdd_nsl/reports/condense/logistic/ + hash: md5 + md5: df73404e3f7d00371dd55b40e76fa9e0.dir + size: 3112185 + nfiles: 2954 + outs: + - path: kdd_nsl/reports/condense/logistic.csv + hash: md5 + md5: 4193461c63aca8b61956fc443f5bcd3d + size: 1649004 + compile@ddos-condense/svc: + cmd: python -m deckard.layers.compile --report_folder ddos/reports/condense/svc --results_file + ddos/reports/condense/svc.csv + deps: + - path: ddos/reports/condense/svc/ + hash: md5 + md5: b40b878f7eca11a9eae0c19e054bee47.dir + size: 8854939 + nfiles: 7199 + outs: + - path: ddos/reports/condense/svc.csv + hash: md5 + md5: 76b35c3e1dfa2d0476a737f9a41c25c4 + size: 3771755 + compile@truthseeker-condense/knn: + cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/condense/knn --results_file + truthseeker/reports/condense/knn.csv + deps: + - path: truthseeker/reports/condense/knn/ + hash: md5 + md5: 1565eb2348976cc6ac9108396141080b.dir + size: 2831604 + nfiles: 3016 + outs: + - path: truthseeker/reports/condense/knn.csv + hash: md5 + md5: b4ec50d98f613984be6261a059120255 + size: 1595839 + compile@truthseeker-condense/svc: + cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/condense/svc --results_file + truthseeker/reports/condense/svc.csv + deps: + - path: truthseeker/reports/condense/svc/ + hash: md5 + md5: 6cbdc47d51df656dcf7e8ae6221795b3.dir + size: 2825163 + nfiles: 3064 + outs: + - path: truthseeker/reports/condense/svc.csv + hash: md5 + md5: 4cdede4407c88bcda2afc8bbeae91ace + size: 1617655 + compile@ddos-condense/knn: + cmd: python -m deckard.layers.compile --report_folder ddos/reports/condense/knn --results_file + ddos/reports/condense/knn.csv + deps: + - path: ddos/reports/condense/knn/ + hash: md5 + md5: 9b6918814be3bea732abc71b8684fd8d.dir + size: 8458502 + nfiles: 9157 + outs: + - path: ddos/reports/condense/knn.csv + hash: md5 + md5: 0cd0ff58f94fb06093779ff81d37d2bf + size: 4723182 + compile@sms_spam-condense/svc: + cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/condense/svc --results_file + sms_spam/reports/condense/svc.csv + deps: + - path: sms_spam/reports/condense/svc/ + hash: md5 + md5: 200cad31398ec4545e7a490011218c47.dir + size: 4416840 + nfiles: 3068 + outs: + - path: sms_spam/reports/condense/svc.csv + hash: md5 + md5: 32f06cbea623f845dcfa7400d707abad + size: 1573621 + compile@kdd_nsl-condense/knn: + cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/condense/knn --results_file + kdd_nsl/reports/condense/knn.csv + deps: + - path: kdd_nsl/reports/condense/knn/ + hash: md5 + md5: 3f8eb680f1f8960490e4581bfa16cfd2.dir + size: 2869636 + nfiles: 3011 + outs: + - path: kdd_nsl/reports/condense/knn.csv + hash: md5 + md5: 29211ec6d9b2b1a5e9193eaabfff3488 + size: 1608857 + compile@truthseeker-condense/logistic: + cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/condense/logistic --results_file + truthseeker/reports/condense/logistic.csv + deps: + - path: truthseeker/reports/condense/logistic/ + hash: md5 + md5: 8bd6876fc856ea5bd1e95b54093aedb8.dir + size: 2976098 + nfiles: 3011 + outs: + - path: truthseeker/reports/condense/logistic.csv + hash: md5 + md5: 5c01852f352ac96150fb36c2df9bcbbf + size: 1648856 + compile@sms_spam-condense/knn: + cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/condense/knn --results_file + sms_spam/reports/condense/knn.csv + deps: + - path: sms_spam/reports/condense/knn/ + hash: md5 + md5: 84b8fcb1e78a8685141409736c6d6afa.dir + size: 4713599 + nfiles: 4258 + outs: + - path: sms_spam/reports/condense/knn.csv + hash: md5 + md5: c8d4f7036e0c3e1cf8fa5a0b922c6ecc + size: 2287605 + compile@ddos-condense/logistic: + cmd: python -m deckard.layers.compile --report_folder ddos/reports/condense/logistic --results_file + ddos/reports/condense/logistic.csv + deps: + - path: ddos/reports/condense/logistic/ + hash: md5 + md5: 7ce841278929a90690417685b7c7f143.dir + size: 5929815 + nfiles: 5888 + outs: + - path: ddos/reports/condense/logistic.csv + hash: md5 + md5: b24764aed957fdf6d2ccb541ef490d37 + size: 3150984 + clean@sms_spam-condense/svc: + cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/svc.csv + -o sms_spam/plots/clean/condense/svc.csv -c conf/clean.yaml + deps: + - path: sms_spam/reports/condense/svc.csv + hash: md5 + md5: 32f06cbea623f845dcfa7400d707abad + size: 1573621 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: sms_spam/plots/clean/condense/svc.csv + hash: md5 + md5: 92b8648f6759e0a56c65aeec4a15aa92 + size: 1223675 + clean@ddos-condense/knn: + cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/knn.csv -o + ddos/plots/clean/condense/knn.csv -c conf/clean.yaml + deps: + - path: ddos/reports/condense/knn.csv + hash: md5 + md5: 0cd0ff58f94fb06093779ff81d37d2bf + size: 4723182 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: ddos/plots/clean/condense/knn.csv + hash: md5 + md5: d214914ecfbba6afbd4ff9a61cb96bb1 + size: 3652514 + clean@truthseeker-condense/svc: + cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/svc.csv + -o truthseeker/plots/clean/condense/svc.csv -c conf/clean.yaml + deps: + - path: truthseeker/reports/condense/svc.csv + hash: md5 + md5: 4cdede4407c88bcda2afc8bbeae91ace + size: 1617655 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: truthseeker/plots/clean/condense/svc.csv + hash: md5 + md5: a17c0cdb6a3fbfae5bd4fcfca1938a96 + size: 1257671 + clean@kdd_nsl-condense/knn: + cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/knn.csv + -o kdd_nsl/plots/clean/condense/knn.csv -c conf/clean.yaml + deps: + - path: kdd_nsl/reports/condense/knn.csv + hash: md5 + md5: 29211ec6d9b2b1a5e9193eaabfff3488 + size: 1608857 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: kdd_nsl/plots/clean/condense/knn.csv + hash: md5 + md5: 23789b08b0fd1616555611d0e7971db9 + size: 1204868 + clean@kdd_nsl-condense/svc: + cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/svc.csv + -o kdd_nsl/plots/clean/condense/svc.csv -c conf/clean.yaml + deps: + - path: kdd_nsl/reports/condense/svc.csv + hash: md5 + md5: 643a67cb6d5974a787efa6339e3af058 + size: 3003804 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: kdd_nsl/plots/clean/condense/svc.csv + hash: md5 + md5: c9b2ff8546f531fa439c664c63fc06fd + size: 2021393 + clean@kdd_nsl-condense/logistic: + cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/logistic.csv + -o kdd_nsl/plots/clean/condense/logistic.csv -c conf/clean.yaml + deps: + - path: kdd_nsl/reports/condense/logistic.csv + hash: md5 + md5: 4193461c63aca8b61956fc443f5bcd3d + size: 1649004 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: kdd_nsl/plots/clean/condense/logistic.csv + hash: md5 + md5: 55a0ac50149a3e3d93b69c63ccd0d7a3 + size: 1174964 + clean@sms_spam-condense/knn: + cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/knn.csv + -o sms_spam/plots/clean/condense/knn.csv -c conf/clean.yaml + deps: + - path: sms_spam/reports/condense/knn.csv + hash: md5 + md5: c8d4f7036e0c3e1cf8fa5a0b922c6ecc + size: 2287605 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: sms_spam/plots/clean/condense/knn.csv + hash: md5 + md5: 7dda620e8ae59aab14ac83c0071a8b96 + size: 1268504 + clean@sms_spam-condense/logistic: + cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/logistic.csv + -o sms_spam/plots/clean/condense/logistic.csv -c conf/clean.yaml + deps: + - path: sms_spam/reports/condense/logistic.csv + hash: md5 + md5: 7094b26a582820cc1f88512573ce8c25 + size: 3430438 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: sms_spam/plots/clean/condense/logistic.csv + hash: md5 + md5: 1f89cfa87c87f195079e49eb5d6e7ce5 + size: 2461824 + clean@truthseeker-condense/logistic: + cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/logistic.csv + -o truthseeker/plots/clean/condense/logistic.csv -c conf/clean.yaml + deps: + - path: truthseeker/reports/condense/logistic.csv + hash: md5 + md5: 5c01852f352ac96150fb36c2df9bcbbf + size: 1648856 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: truthseeker/plots/clean/condense/logistic.csv + hash: md5 + md5: 9710addb440069a5ea884d90ed4c394a + size: 1237939 + clean@truthseeker-condense/knn: + cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/knn.csv + -o truthseeker/plots/clean/condense/knn.csv -c conf/clean.yaml + deps: + - path: truthseeker/reports/condense/knn.csv + hash: md5 + md5: b4ec50d98f613984be6261a059120255 + size: 1595839 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: truthseeker/plots/clean/condense/knn.csv + hash: md5 + md5: a0c8deb8fe7617477ec43fae2a851b4d + size: 1191230 + clean@ddos-condense/svc: + cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/svc.csv -o + ddos/plots/clean/condense/svc.csv -c conf/clean.yaml + deps: + - path: ddos/reports/condense/svc.csv + hash: md5 + md5: 76b35c3e1dfa2d0476a737f9a41c25c4 + size: 3771755 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: ddos/plots/clean/condense/svc.csv + hash: md5 + md5: 102b712883464d547a4d2119f6c5df60 + size: 2968961 + clean@ddos-condense/logistic: + cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/logistic.csv + -o ddos/plots/clean/condense/logistic.csv -c conf/clean.yaml + deps: + - path: ddos/reports/condense/logistic.csv + hash: md5 + md5: b24764aed957fdf6d2ccb541ef490d37 + size: 3150984 + params: + conf/clean.yaml: + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: Gzip + pkl: Pickle + bz2: BZ2 + zstd: Zstd + lzma: Lzma + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + outs: + - path: ddos/plots/clean/condense/logistic.csv + hash: md5 + md5: bfca6e865bca11a25fa1e42dfbdea0ad + size: 2331762 + merge_condense@ddos: + cmd: python merge.py --big_dir ddos/plots/ --data_file clean/condense/knn.csv + --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder + ddos/plots/ --output_file condensed_merged.csv + deps: + - path: ddos/plots/clean/condense/knn.csv + hash: md5 + md5: d214914ecfbba6afbd4ff9a61cb96bb1 + size: 3652514 + - path: ddos/plots/clean/condense/logistic.csv + hash: md5 + md5: bfca6e865bca11a25fa1e42dfbdea0ad + size: 2331762 + - path: ddos/plots/clean/condense/svc.csv + hash: md5 + md5: 102b712883464d547a4d2119f6c5df60 + size: 2968961 + outs: + - path: ddos/plots/condensed_merged.csv + hash: md5 + md5: dc147a2e9c585b39c5e212a46ade70ac + size: 9306964 + merge_condense@kdd_nsl: + cmd: python merge.py --big_dir kdd_nsl/plots/ --data_file clean/condense/knn.csv + --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder + kdd_nsl/plots/ --output_file condensed_merged.csv + deps: + - path: kdd_nsl/plots/clean/condense/knn.csv + hash: md5 + md5: 23789b08b0fd1616555611d0e7971db9 + size: 1204868 + - path: kdd_nsl/plots/clean/condense/logistic.csv + hash: md5 + md5: 55a0ac50149a3e3d93b69c63ccd0d7a3 + size: 1174964 + - path: kdd_nsl/plots/clean/condense/svc.csv + hash: md5 + md5: c9b2ff8546f531fa439c664c63fc06fd + size: 2021393 + outs: + - path: kdd_nsl/plots/condensed_merged.csv + hash: md5 + md5: 1ddcee7de7db0c1a7d4898de4a03d7b7 + size: 4543759 + merge_condense@sms_spam: + cmd: python merge.py --big_dir sms_spam/plots/ --data_file clean/condense/knn.csv + --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder + sms_spam/plots/ --output_file condensed_merged.csv + deps: + - path: sms_spam/plots/clean/condense/knn.csv + hash: md5 + md5: 7dda620e8ae59aab14ac83c0071a8b96 + size: 1268504 + - path: sms_spam/plots/clean/condense/logistic.csv + hash: md5 + md5: 1f89cfa87c87f195079e49eb5d6e7ce5 + size: 2461824 + - path: sms_spam/plots/clean/condense/svc.csv + hash: md5 + md5: 92b8648f6759e0a56c65aeec4a15aa92 + size: 1223675 + outs: + - path: sms_spam/plots/condensed_merged.csv + hash: md5 + md5: 8f549743001ca622a6c7c8cbb2b3d17d + size: 5114716 + merge_condense@truthseeker: + cmd: python merge.py --big_dir truthseeker/plots/ --data_file clean/condense/knn.csv + --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder + truthseeker/plots/ --output_file condensed_merged.csv + deps: + - path: truthseeker/plots/clean/condense/knn.csv + hash: md5 + md5: a0c8deb8fe7617477ec43fae2a851b4d + size: 1191230 + - path: truthseeker/plots/clean/condense/logistic.csv + hash: md5 + md5: 9710addb440069a5ea884d90ed4c394a + size: 1237939 + - path: truthseeker/plots/clean/condense/svc.csv + hash: md5 + md5: a17c0cdb6a3fbfae5bd4fcfca1938a96 + size: 1257671 + outs: + - path: truthseeker/plots/condensed_merged.csv + hash: md5 + md5: 738dc93bfff1b9c167949e722ee79665 + size: 3805499 + grid_search@300-ddos-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=300 + data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=ddos/logs/gzip_knn/300 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/300/study.csv + files.directory=ddos files.reports=reports/gzip_knn/300 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: ddos/logs/gzip_knn/300 + hash: md5 + md5: 1e533c118406ca2ffae2b0a3e11a5035.dir + size: 1671182 + nfiles: 514 + - path: ddos/reports/gzip_knn/300/train/ + hash: md5 + md5: 000376454dd461f25065cdb093e78e7c.dir + size: 1461265 + nfiles: 1403 + plot_condense@sms_spam: + cmd: python -m deckard.layers.plots --path sms_spam/plots/ --file sms_spam/plots/condensed_merged.csv -c + conf/condensed_plots.yaml + deps: + - path: sms_spam/plots/condensed_merged.csv + hash: md5 + md5: 8f549743001ca622a6c7c8cbb2b3d17d + size: 5114716 + params: + conf/condensed_plots.yaml: + line_plot: + - file: sampling_method_vs_accuracy.pdf + hue: model.init.sampling_method + title: + x: model.init.m + xlabel: Percentage of Samples per Class + y: accuracy + ylabel: Accuracy + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim: + - 0 + - 1 + y_scale: linear + legend: + title: Sampling Method + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: sampling_method_vs_train_time.pdf + hue: model.init.sampling_method + title: + x: model.init.m + xlabel: Percentage of Samples per Class + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim: + - 0 + - 1 + legend: + title: Sampling Method + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: sampling_method_vs_predict_time.pdf + hue: model.init.sampling_method + title: + x: model.init.m + xlabel: Percentage of Samples per Class + y: predict_time + ylabel: Prediction Time (s) + y_scale: log + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim: + - 0 + - 1 + legend: + title: Sampling Method + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + outs: + - path: sms_spam/plots/sampling_method_vs_accuracy.pdf + hash: md5 + md5: 8d3c7b03379f2f16bdb6de450083608b + size: 40643 + - path: sms_spam/plots/sampling_method_vs_predict_time.pdf + hash: md5 + md5: 095622e64533aedee66d72079f141c0d + size: 53902 + - path: sms_spam/plots/sampling_method_vs_train_time.pdf + hash: md5 + md5: da26bd3fc967c9925975f6c8ad189a88 + size: 50367 + plot_condense@ddos: + cmd: python -m deckard.layers.plots --path ddos/plots/ --file ddos/plots/condensed_merged.csv -c + conf/condensed_plots.yaml + deps: + - path: ddos/plots/condensed_merged.csv + hash: md5 + md5: dc147a2e9c585b39c5e212a46ade70ac + size: 9306964 + params: + conf/condensed_plots.yaml: + line_plot: + - file: sampling_method_vs_accuracy.pdf + hue: model.init.sampling_method + title: + x: model.init.m + xlabel: Percentage of Samples per Class + y: accuracy + ylabel: Accuracy + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim: + - 0 + - 1 + y_scale: linear + legend: + title: Sampling Method + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: sampling_method_vs_train_time.pdf + hue: model.init.sampling_method + title: + x: model.init.m + xlabel: Percentage of Samples per Class + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim: + - 0 + - 1 + legend: + title: Sampling Method + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: sampling_method_vs_predict_time.pdf + hue: model.init.sampling_method + title: + x: model.init.m + xlabel: Percentage of Samples per Class + y: predict_time + ylabel: Prediction Time (s) + y_scale: log + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim: + - 0 + - 1 + legend: + title: Sampling Method + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + outs: + - path: ddos/plots/sampling_method_vs_accuracy.pdf + hash: md5 + md5: 09737e6b272979bf7fc879ece10d25e5 + size: 57907 + - path: ddos/plots/sampling_method_vs_predict_time.pdf + hash: md5 + md5: 78e2e0111219f86d189dfb952d81cdba + size: 78230 + - path: ddos/plots/sampling_method_vs_train_time.pdf + hash: md5 + md5: ab34ce0b71b6c0153525b0194178ecaf + size: 64512 + plot_condense@kdd_nsl: + cmd: python -m deckard.layers.plots --path kdd_nsl/plots/ --file kdd_nsl/plots/condensed_merged.csv -c + conf/condensed_plots.yaml + deps: + - path: kdd_nsl/plots/condensed_merged.csv + hash: md5 + md5: 1ddcee7de7db0c1a7d4898de4a03d7b7 + size: 4543759 + params: + conf/condensed_plots.yaml: + line_plot: + - file: sampling_method_vs_accuracy.pdf + hue: model.init.sampling_method + title: + x: model.init.m + xlabel: Percentage of Samples per Class + y: accuracy + ylabel: Accuracy + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim: + - 0 + - 1 + y_scale: linear + legend: + title: Sampling Method + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: sampling_method_vs_train_time.pdf + hue: model.init.sampling_method + title: + x: model.init.m + xlabel: Percentage of Samples per Class + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim: + - 0 + - 1 + legend: + title: Sampling Method + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: sampling_method_vs_predict_time.pdf + hue: model.init.sampling_method + title: + x: model.init.m + xlabel: Percentage of Samples per Class + y: predict_time + ylabel: Prediction Time (s) + y_scale: log + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim: + - 0 + - 1 + legend: + title: Sampling Method + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + outs: + - path: kdd_nsl/plots/sampling_method_vs_accuracy.pdf + hash: md5 + md5: 1c673220cd32e3f9bd2aa92516d0b20e + size: 38546 + - path: kdd_nsl/plots/sampling_method_vs_predict_time.pdf + hash: md5 + md5: 4bcb086fcd47e05d2b79e30a12d15869 + size: 50187 + - path: kdd_nsl/plots/sampling_method_vs_train_time.pdf + hash: md5 + md5: 2b3e91d9b656ba35d06f8e97d1e8359d + size: 45992 + grid_search@300-ddos-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=300 + data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=ddos/logs/gzip_logistic/300 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/300/study.csv + files.directory=ddos files.reports=reports/gzip_logistic/300 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: ddos/logs/gzip_logistic/300 + hash: md5 + md5: ace39d7825de3ce5c0d678839c812ab6.dir + size: 1765030 + nfiles: 514 + - path: ddos/reports/gzip_logistic/300/train/ + hash: md5 + md5: 9f23532033970310bd5915d4018de935.dir + size: 1436932 + nfiles: 963 + grid_search@300-ddos-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=300 + data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=ddos/logs/gzip_svc/300 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/300/study.csv + files.directory=ddos files.reports=reports/gzip_svc/300 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: ddos/logs/gzip_svc/300 + hash: md5 + md5: 7681421b662e0a0690e9a1a6a4cf4b79.dir + size: 1710386 + nfiles: 514 + - path: ddos/reports/gzip_svc/300/train/ + hash: md5 + md5: c872a806e708289c65e6856bc2a057bf.dir + size: 1393355 + nfiles: 1045 + plot_condense@truthseeker: + cmd: python -m deckard.layers.plots --path truthseeker/plots/ --file truthseeker/plots/condensed_merged.csv -c + conf/condensed_plots.yaml + deps: + - path: truthseeker/plots/condensed_merged.csv + hash: md5 + md5: 738dc93bfff1b9c167949e722ee79665 + size: 3805499 + params: + conf/condensed_plots.yaml: + line_plot: + - file: sampling_method_vs_accuracy.pdf + hue: model.init.sampling_method + title: + x: model.init.m + xlabel: Percentage of Samples per Class + y: accuracy + ylabel: Accuracy + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim: + - 0 + - 1 + y_scale: linear + legend: + title: Sampling Method + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: sampling_method_vs_train_time.pdf + hue: model.init.sampling_method + title: + x: model.init.m + xlabel: Percentage of Samples per Class + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim: + - 0 + - 1 + legend: + title: Sampling Method + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: sampling_method_vs_predict_time.pdf + hue: model.init.sampling_method + title: + x: model.init.m + xlabel: Percentage of Samples per Class + y: predict_time + ylabel: Prediction Time (s) + y_scale: log + hue_order: + - random + - svc + - knn + - sum + - medoid + - nearmiss + - hardness + errorbar: se + err_style: bars + xlim: + - 0 + - 1 + legend: + title: Sampling Method + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + outs: + - path: truthseeker/plots/sampling_method_vs_accuracy.pdf + hash: md5 + md5: 0d293f64173585cb19c88218a7327f83 + size: 18158 + - path: truthseeker/plots/sampling_method_vs_predict_time.pdf + hash: md5 + md5: bb494d7b950451096bb639f3a9f1b4cb + size: 45092 + - path: truthseeker/plots/sampling_method_vs_train_time.pdf + hash: md5 + md5: 85a9eeb8f5aecc63f5634b12483941cf + size: 39796 + grid_search@500-ddos-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=500 + data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=ddos/logs/gzip_logistic/500 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/500/study.csv + files.directory=ddos files.reports=reports/gzip_logistic/500 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: ddos/logs/gzip_logistic/500 + hash: md5 + md5: afb6463625f139e82a88976c24b93f16.dir + size: 1791134 + nfiles: 514 + - path: ddos/reports/gzip_logistic/500/train/ + hash: md5 + md5: dbed10dfbc2747c79e14dcedcbce0661.dir + size: 968208 + nfiles: 702 + grid_search@500-ddos-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=500 + data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=ddos/logs/gzip_svc/500 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/500/study.csv + files.directory=ddos files.reports=reports/gzip_svc/500 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: ddos/logs/gzip_svc/500 + hash: md5 + md5: 319357234ff9123f09bb6603fe74866f.dir + size: 1737584 + nfiles: 514 + - path: ddos/reports/gzip_svc/500/train/ + hash: md5 + md5: 63ecb36bf4e16027b60bcd2892330829.dir + size: 897567 + nfiles: 768 + grid_search@100-sms_spam-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_sms_spam + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/100 + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/100/study.csv + files.directory=sms_spam files.reports=reports/gzip_logistic/100 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: sms_spam/logs/gzip_logistic/100 + hash: md5 + md5: d1120618c5a674fe50c5717e2d71d640.dir + size: 1554813 + nfiles: 514 + - path: sms_spam/reports/gzip_logistic/100/train/ + hash: md5 + md5: 89f61791ac36513c4957057485a2e8e3.dir + size: 553318 + nfiles: 357 + grid_search@100-sms_spam-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=sms_spam/logs/gzip_svc/100 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/100/study.csv + files.directory=sms_spam files.reports=reports/gzip_svc/100 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: sms_spam/logs/gzip_svc/100 + hash: md5 + md5: cb8e4936d6ee03af99fa775d8b4b956b.dir + size: 1483653 + nfiles: 514 + - path: sms_spam/reports/gzip_svc/100/train/ + hash: md5 + md5: ae31535b48c489e3040a2836c43215a5.dir + size: 543085 + nfiles: 384 + grid_search@300-kdd_nsl-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=kdd_nsl/logs/gzip_knn/300 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/300/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_knn/300 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: kdd_nsl/logs/gzip_knn/300 + hash: md5 + md5: d3f58cbd5181a4f86ac660aba7173dfb.dir + size: 1437824 + nfiles: 514 + - path: kdd_nsl/reports/gzip_knn/300/train/ + hash: md5 + md5: d5317915e16e54a5fb4c82963cc0b058.dir + size: 825336 + nfiles: 612 + grid_search@300-kdd_nsl-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_kdd_nsl + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/300 + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/300/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_logistic/300 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: kdd_nsl/logs/gzip_logistic/300 + hash: md5 + md5: 6793362a9053b6f28647bb49875ebcf3.dir + size: 1634660 + nfiles: 514 + - path: kdd_nsl/reports/gzip_logistic/300/train/ + hash: md5 + md5: f2a46e55c8597a4d4082202f69186083.dir + size: 945424 + nfiles: 723 + grid_search@300-kdd_nsl-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=kdd_nsl/logs/gzip_svc/300 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/300/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_svc/300 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: kdd_nsl/logs/gzip_svc/300 + hash: md5 + md5: 1bd3b191acf0f78e361e1bc3cb6df928.dir + size: 1584389 + nfiles: 514 + - path: kdd_nsl/reports/gzip_svc/300/train/ + hash: md5 + md5: b6e64c8b751bf3a140aa9871f341a173.dir + size: 899234 + nfiles: 765 + grid_search@300-sms_spam-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=sms_spam/logs/gzip_knn/300 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/300/study.csv + files.directory=sms_spam files.reports=reports/gzip_knn/300 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: sms_spam/logs/gzip_knn/300 + hash: md5 + md5: 09019492218a189aabe0601cb4c3f3a3.dir + size: 1460894 + nfiles: 514 + - path: sms_spam/reports/gzip_knn/300/train/ + hash: md5 + md5: 3aa09498a167a50051ee2fdf3e46d62d.dir + size: 364240 + nfiles: 349 + grid_search@300-sms_spam-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_sms_spam + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/300 + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/300/study.csv + files.directory=sms_spam files.reports=reports/gzip_logistic/300 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: sms_spam/logs/gzip_logistic/300 + hash: md5 + md5: 627574a996abf0037be2b9d798c0a1f6.dir + size: 1593011 + nfiles: 514 + - path: sms_spam/reports/gzip_logistic/300/train/ + hash: md5 + md5: 886edc50f38dc580603074bf8dc46835.dir + size: 553839 + nfiles: 363 + grid_search@300-sms_spam-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=sms_spam/logs/gzip_svc/300 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/300/study.csv + files.directory=sms_spam files.reports=reports/gzip_svc/300 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: sms_spam/logs/gzip_svc/300 + hash: md5 + md5: 7d9d939af4228ad75b78ee5c347a984a.dir + size: 1513139 + nfiles: 514 + - path: sms_spam/reports/gzip_svc/300/train/ + hash: md5 + md5: cb8713e4f13494c3c1ab3c93c238d2d7.dir + size: 544369 + nfiles: 384 + grid_search@300-truthseeker-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=truthseeker/logs/gzip_knn/300 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/300/study.csv + files.directory=truthseeker files.reports=reports/gzip_knn/300 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: truthseeker/logs/gzip_knn/300 + hash: md5 + md5: 7fc2fb64903d90052db980e395a73a1b.dir + size: 1418937 + nfiles: 514 + - path: truthseeker/reports/gzip_knn/300/train/ + hash: md5 + md5: 1b7d0b73ddb24fa30f48675625cad64c.dir + size: 384561 + nfiles: 332 + grid_search@300-truthseeker-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_truthseeker + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/300 + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/300/study.csv + files.directory=truthseeker files.reports=reports/gzip_logistic/300 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: truthseeker/logs/gzip_logistic/300 + hash: md5 + md5: 121b624ea70d27aba89bd5448c35580f.dir + size: 1564349 + nfiles: 514 + - path: truthseeker/reports/gzip_logistic/300/train/ + hash: md5 + md5: 7dfeff37b85b221b60c7bad442f21658.dir + size: 557318 + nfiles: 367 + grid_search@300-truthseeker-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=truthseeker/logs/gzip_svc/300 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/300/study.csv + files.directory=truthseeker files.reports=reports/gzip_svc/300 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: truthseeker/logs/gzip_svc/300 + hash: md5 + md5: c1b03e3fa37ca812864d04d3a38216db.dir + size: 1536045 + nfiles: 514 + - path: truthseeker/reports/gzip_svc/300/train/ + hash: md5 + md5: 2cf3648372291b72f9b16020c5c3ad4e.dir + size: 548358 + nfiles: 384 + grid_search@500-ddos-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=500 + data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=ddos/logs/gzip_knn/500 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/500/study.csv + files.directory=ddos files.reports=reports/gzip_knn/500 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: ddos/logs/gzip_knn/500 + hash: md5 + md5: ebb76a3ffe046f5763072644ec826dd9.dir + size: 1693130 + nfiles: 514 + - path: ddos/reports/gzip_knn/500/train/ + hash: md5 + md5: 00682fbb7c897d179ed788f09be3b1e9.dir + size: 732559 + nfiles: 763 + grid_search@500-kdd_nsl-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=kdd_nsl/logs/gzip_knn/500 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/500/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_knn/500 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: kdd_nsl/logs/gzip_knn/500 + hash: md5 + md5: f1d5a2b6b59bc61a8c8d9c52d3a2ad11.dir + size: 1496906 + nfiles: 514 + - path: kdd_nsl/reports/gzip_knn/500/train/ + hash: md5 + md5: bffa17c78573257f1d85dccf5d93fade.dir + size: 388686 + nfiles: 335 + grid_search@500-kdd_nsl-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_kdd_nsl + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/500 + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/500/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_logistic/500 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: kdd_nsl/logs/gzip_logistic/500 + hash: md5 + md5: 44795a3a64e10088623faf15b87a4548.dir + size: 1666384 + nfiles: 514 + - path: kdd_nsl/reports/gzip_logistic/500/train/ + hash: md5 + md5: 607cd0515dec2502b0bd11b6480b5d7b.dir + size: 565896 + nfiles: 357 + grid_search@500-kdd_nsl-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=kdd_nsl/logs/gzip_svc/500 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/500/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_svc/500 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: kdd_nsl/logs/gzip_svc/500 + hash: md5 + md5: 1ed2e3d83e888471981684eaaa3f3b8e.dir + size: 1613038 + nfiles: 514 + - path: kdd_nsl/reports/gzip_svc/500/train/ + hash: md5 + md5: c53dae7497a8f55965cc708c28280f4e.dir + size: 555797 + nfiles: 384 + grid_search@500-sms_spam-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=sms_spam/logs/gzip_knn/500 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/500/study.csv + files.directory=sms_spam files.reports=reports/gzip_knn/500 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: sms_spam/logs/gzip_knn/500 + hash: md5 + md5: 0e5c9c1b5970ef63e76b3adcbb1d9bde.dir + size: 1465483 + nfiles: 514 + - path: sms_spam/reports/gzip_knn/500/train/ + hash: md5 + md5: dd14847ddf87817f4410aea70b8fdce3.dir + size: 378991 + nfiles: 331 + grid_search@500-sms_spam-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_sms_spam + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/500 + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/500/study.csv + files.directory=sms_spam files.reports=reports/gzip_logistic/500 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: sms_spam/logs/gzip_logistic/500 + hash: md5 + md5: 6e6d0761de2d778fbdbebd1d547f04a1.dir + size: 1619183 + nfiles: 514 + - path: sms_spam/reports/gzip_logistic/500/train/ + hash: md5 + md5: fb78d7f4f526194a09b6561a121f734e.dir + size: 553072 + nfiles: 361 + grid_search@500-sms_spam-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=sms_spam/logs/gzip_svc/500 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/500/study.csv + files.directory=sms_spam files.reports=reports/gzip_svc/500 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: sms_spam/logs/gzip_svc/500 + hash: md5 + md5: 4b37a4947b8a27e8b050b76a2252f6d2.dir + size: 1542505 + nfiles: 514 + - path: sms_spam/reports/gzip_svc/500/train/ + hash: md5 + md5: adfaa61acf833b9b2d823fd944876030.dir + size: 543664 + nfiles: 384 + grid_search@500-truthseeker-gzip_knn: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=truthseeker/logs/gzip_knn/500 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/500/study.csv + files.directory=truthseeker files.reports=reports/gzip_knn/500 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: a58015cd6f327e171842b045a2524bfd + size: 2062 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 2 + n_jobs: 2 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + ++data.sample.random_state: int(interval(1, 10000)) + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_knn + outs: + - path: truthseeker/logs/gzip_knn/500 + hash: md5 + md5: 8f89bb6eee2faa7d319f0e667a455558.dir + size: 1449788 + nfiles: 514 + - path: truthseeker/reports/gzip_knn/500/train/ + hash: md5 + md5: 22ad9cc6a9f1fc454ff08e23e1194b6a.dir + size: 382020 + nfiles: 333 + grid_search@500-truthseeker-gzip_logistic: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_truthseeker + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/500 + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/500/study.csv + files.directory=truthseeker files.reports=reports/gzip_logistic/500 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml + hash: md5 + md5: 847d4d804fff0b6f2533f90820eebd04 + size: 2205 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 1 + n_trials: 1 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1,l2,none + +model.init.tol: 1e-4,1e-3,1e-2 + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_logistic + outs: + - path: truthseeker/logs/gzip_logistic/500 + hash: md5 + md5: 536a09eb3f82d03737e3cec6aafdbac8.dir + size: 1605851 + nfiles: 514 + - path: truthseeker/reports/gzip_logistic/500/train/ + hash: md5 + md5: 4560cd0abd0609eebe34c6f578d77f2d.dir + size: 556183 + nfiles: 375 + grid_search@500-truthseeker-gzip_svc: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 + hydra.sweep.dir=truthseeker/logs/gzip_svc/500 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/500/study.csv + files.directory=truthseeker files.reports=reports/gzip_svc/500 hydra.launcher.n_jobs=-1 + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml + hash: md5 + md5: 957922cb6993eb99866232d944a4a106 + size: 2131 + - path: params.yaml + hash: md5 + md5: 8be0cf0b5f453ffb12b19a1bf1af6468 + size: 1435 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + seed: 123 + consider_prior: true + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: false + n_startup_trials: 10 + n_ei_candidates: 24 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_jobs: 2 + n_trials: 2 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model.init.symmetric: True,False + ++model.init.precompute: true + model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name: ${model_name} + data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + model_name: gzip_svc + outs: + - path: truthseeker/logs/gzip_svc/500 + hash: md5 + md5: 10808502e0c1c7d780ea6178ae53c19c.dir + size: 1568093 + nfiles: 514 + - path: truthseeker/reports/gzip_svc/500/train/ + hash: md5 + md5: 1fb9105254065d6d93e9647e12d650b2.dir + size: 547905 + nfiles: 384 diff --git a/examples/gzip/dvc.yaml b/examples/gzip/dvc.yaml new file mode 100644 index 00000000..b7d4c8d6 --- /dev/null +++ b/examples/gzip/dvc.yaml @@ -0,0 +1,439 @@ +vars: + - conf/default.yaml:hydra + - conf/plots.yaml:line_plot + - conf/plots.yaml:cat_plot + # - conf/condensed_plots.yaml:line_plot + # - conf/clean.yaml:params + # - conf/clean.yaml:fillna + - conf/clean.yaml:replace +stages: + ############################################################################## + # These stages use the dvc API to run a single experiment at a time using a + # deckard.Experiment object. This parses this file, saves the default hydra configuration + # to params.yaml, and then runs the experiment with the given parameters. + # This can be used to define a stage that runs a single experiment, or a stage for the + # optimisation using the optimise.py script and the hydrasweeper API. This API is primarily used for + # development and testing, as it is easier to run a single experiment at a time and debug it using + # DVC's git-like features to track changes and minimise run time. + + # This stage will parse the conf/default.yaml file and save it to params.yaml + # In addition, it will define a schema stage that will will + # 1. Determine the file paths for the data and model files (if specified) + # 2. Run the experiment with the given parameters + # 3. Save the results to the given file paths (will always save a score_dict_file, and a params_file to files.directory/files.reports/stage/) + # 4. Save a 'params.yaml' file with the scores (Always) + # 5. Save the predictions to the given file paths (if specified) + # 6. Save the probabilities to the given file paths (if specified) + # 7. Save the (final) losses to the given file paths (if specified) + # 8. Save the train/test labels to the given file paths (if specified) + + # You can arbitrarily define parameters in the 'conf' folder. + # To parse the parameters, you can use the 'hydra' API to define a schema for the parameters + # Or run `deckard.layers.parse` to parse the parameters and save them to a file. + ############################################################################## + data: + desc: "This stage will parse the conf/default.yaml file and save it to params.yaml" + cmd: python data_prep.py + outs: + - raw_data/ + deps: + - data_prep.py + parse_params: + cmd: python -m deckard.layers.parse + deps: + - conf/default.yaml + - conf/data/default.yaml + - conf/model/default.yaml + - conf/files/default.yaml + - conf/scorers/default.yaml + outs: + - params.yaml: + cache: true + desc : "Parsed parameters for the experiment" + persist: true + push : true + + train: + cmd: python -m deckard.layers.experiment train + metrics: + - ${files.directory}/${files.reports}/train/${files.name}/${files.score_dict_file} + outs: + - ${files.directory}/${files.reports}/train/${files.name}/${files.predictions_file} + params: + - data + - model + - scorers + - files + - dataset + - model_name + - device_id + deps: + - params.yaml + - raw_data/ # Raw data + ############################################################################## + test_each_dataset: + matrix: + dataset : [ddos, truthseeker, sms_spam, kdd_nsl] + model_name : [gzip_knn, gzip_svc, gzip_logistic] + cmd : >- + python -m deckard.layers.optimise + stage=train + files.name=${item.model_name} + data.sample.train_size=100 + files.directory=${item.dataset} + data=${item.dataset} + dataset=${item.dataset} + model_name=${item.model_name} + model=${item.model_name} + hydra.run.dir=${item.dataset}/logs/train/${item.model_name} + ++raise_exception=True + deps: + - params.yaml + - ${files.directory}/${files.reports}/train/default/${files.score_dict_file} + outs: + - ${item.dataset}/${files.reports}/train/${item.model_name}/${files.score_dict_file} + - ${item.dataset}/logs/train/${item.model_name} + params: + - data + - model + - scorers + - files + - dataset + - model_name + - device_id + ############################################################################## + test_each_metric: + matrix: + metric: [gzip, zstd, pkl, bz2, lzma,levenshtein, ratio, hamming, jaro, jaro_winkler, seqratio] + model : [gzip_knn,] # gzip_svc, gzip_logistic + dataset : [kdd_nsl] #truthseeker, sms_spam, ddos + train_size: [20] #100, 1000, 10000 + cmd : >- + python -m deckard.layers.optimise + stage=test_each_metric + files.name=${item.model}/${item.metric}/${item.train_size} + files.directory=${item.dataset} + data=${item.dataset} + data.sample.train_size=${item.train_size} + dataset=${item.dataset} + model=${item.model} + model_name=${model_name} + model.init.metric=${item.metric} + model.init.m=-1 + hydra.run.dir=${item.dataset}/logs/test_each_metric/${item.model}/${item.metric}/${item.train_size} + ++raise_exception=True + deps: + - params.yaml + - ${files.directory}/${files.reports}/train/default/${files.score_dict_file} + outs: + - ${item.dataset}/${files.reports}/test_each_metric/${item.model}/${item.metric}/${item.train_size}/${files.score_dict_file} + - ${item.dataset}/logs/test_each_metric/${item.model}/${item.metric}/${item.train_size} + params: + - data + - model + - scorers + - files + - dataset + - model_name + - device_id + # ############################################################################## + test_each_model: + matrix: + metric: [gzip] #, zstd, pkl, bz2, lzma,levenshtein, ratio, hamming, jaro, jaro_winkler, seqratio + model : [gzip_knn, gzip_svc, gzip_logistic] + dataset : [kdd_nsl] #truthseeker, sms_spam, ddos + train_size: [20] #100, 1000, 10000 + cmd : >- + python -m deckard.layers.optimise + stage=test_each_model + files.name=${item.model}/${item.metric}/${item.train_size} + files.directory=${item.dataset} + data=${item.dataset} + data.sample.train_size=${item.train_size} + dataset=${item.dataset} + model=${item.model} + model_name=${model_name} + model.init.metric=${item.metric} + model.init.m=-1 + hydra.run.dir=${item.dataset}/logs/test_each_model/${item.model}/${item.metric}/${item.train_size} + ++raise_exception=True + deps: + - params.yaml + - ${files.directory}/${files.reports}/train/default/${files.score_dict_file} + outs: + - ${item.dataset}/${files.reports}/test_each_model/${item.model}/${item.metric}/${item.train_size}/${files.score_dict_file} + - ${item.dataset}/logs/test_each_model/${item.model}/${item.metric}/${item.train_size} + params: + - data + - model + - scorers + - files + - dataset + - model_name + - device_id + ############################################################################## + grid_search: + matrix: + train_size: [20, 100, 300, 500] # + dataset : [ddos, kdd_nsl, sms_spam, truthseeker] # + configs: [gzip_knn, gzip_logistic, gzip_svc] + cmd: >- + python -m deckard.layers.optimise + stage=train + data=${item.dataset} + dataset=${item.dataset} + data.sample.train_size=${item.train_size} + data.sample.test_size=100 + model_name=${item.configs} + model.init.distance_matrix=null + hydra.sweeper.study_name=${item.configs}_${item.dataset} + hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 + hydra.sweep.dir=${item.dataset}/logs/${item.configs}/${item.train_size} + hydra.callbacks.study_dump.output_file=${item.dataset}/logs/${item.configs}/${item.train_size}/study.csv + files.directory=${item.dataset} + files.reports=${files.reports}/${item.configs}/${item.train_size} + hydra.launcher.n_jobs=-1 + ++raise_exception=True + --config-name ${item.configs} + --multirun + deps: + - params.yaml + - conf/${item.configs}.yaml + outs: + - ${item.dataset}/logs/${item.configs}/${item.train_size} + - ${item.dataset}/${files.reports}/${item.configs}/${item.train_size}/train/: + cache: true + persist: true + push: true + params: + - conf/${item.configs}.yaml: + - hydra + - model_name + ############################################################################## + # find_best_model: # This isn't actually used in later steps, but it's handy to have these configs ready for a line search instead of a massive grid search + # matrix: + # dataset : [ddos, kdd_nsl, sms_spam,] # + # model : [gzip_knn, gzip_svc, gzip_logistic] + # deps: + # - ${item.dataset}/logs/${item.model}/ + # cmd: >- + # python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name ${item.model}_${item.dataset} --config_subdir model --params_file best_${item.model}_${item.dataset} --default_config ${item.model} + # outs: + # - conf/model/best_${item.model}_${item.dataset}.yaml + ############################################################################# + test_each_method: + matrix: + dataset : [ddos] # kdd_nsl, truthseeker, sms_spam, + method: [medoid, sum, svc, hardness, nearmiss,random,knn] + cmd : >- + python -m deckard.layers.optimise + stage=train + +model.init.sampling_method=${item.method} + model.init.m=3 + data.sample.train_size=100 + files.name=${item.method} + files.directory=${item.dataset} + data=${item.dataset} + dataset=${item.dataset} + model_name=${item.method} + hydra.run.dir=${item.dataset}/logs/method/${item.method} + ++raise_exception=True + deps: + - params.yaml + - ${files.directory}/${files.reports}/train/default/${files.score_dict_file} + outs: + - ${item.dataset}/${files.reports}/train/${item.method}/${files.score_dict_file} + - ${item.dataset}/logs/method/${item.method} + params: + - data + - model + - scorers + - files + - dataset + - model_name + - device_id + ############################################################################## + condense: + matrix: + dataset : [ddos, kdd_nsl, truthseeker, sms_spam,] # kdd_nsl, truthseeker, sms_spam, + model_name : [knn, svc, logistic] + deps: + - params.yaml + - conf/condense_${item.model_name}.yaml + cmd: >- + python -m deckard.layers.optimise + stage=train + data=${item.dataset} + dataset=${item.dataset} + data.sample.train_size=100 + data.sample.test_size=100 + model_name=condensed_${item.model_name} + model=gzip_${item.model_name} + files.directory=${item.dataset} + files.reports=${files.reports}/condense/${item.model_name}/ + hydra.sweeper.study_name=condense_${item.model_name}_${item.dataset} + hydra.sweeper.n_trials=1024 + hydra.sweeper.n_jobs=8 + hydra.sweep.dir=${item.dataset}/logs/condense/${item.model_name}/ + hydra.callbacks.study_dump.output_file=${item.dataset}/logs/${item.model_name}/study.csv + hydra.launcher.n_jobs=-1 + --config-name condense_${item.model_name} + --multirun + outs: + - ${item.dataset}/logs/condense/${item.model_name}/ + - ${item.dataset}/${files.reports}/condense/${item.model_name}/: + cache: true + persist: true + push: true + params: + - conf/condense.yaml: + - hydra + compile: + matrix: + dataset : [kdd_nsl, sms_spam, ddos] + stage : [gzip_knn, gzip_svc, gzip_logistic, condense/knn, condense/svc, condense/logistic] + deps: + - ${item.dataset}/${files.reports}/${item.stage}/ + outs: + - ${item.dataset}/${files.reports}/${item.stage}.csv + cmd: >- + python -m deckard.layers.compile + --report_folder ${item.dataset}/${files.reports}/${item.stage} + --results_file ${item.dataset}/${files.reports}/${item.stage}.csv + ############################################################################## + clean: + matrix: + dataset : [kdd_nsl, sms_spam, ddos] + stage : [gzip_knn, gzip_svc, gzip_logistic, condense/knn, condense/svc, condense/logistic] + deps: + - ${item.dataset}/${files.reports}/${item.stage}.csv + cmd: >- + python -m deckard.layers.clean_data + -i ${item.dataset}/${files.reports}/${item.stage}.csv + -o ${item.dataset}/plots/clean/${item.stage}.csv + -c conf/clean.yaml + outs: + - ${item.dataset}/plots/clean/${item.stage}.csv + params: + - conf/clean.yaml: + - replace + ############################################################################## + merge: + matrix: + dataset : [kdd_nsl, sms_spam, ddos] + deps: + - ${item.dataset}/plots/clean/gzip_knn.csv + - ${item.dataset}/plots/clean/gzip_logistic.csv + - ${item.dataset}/plots/clean/gzip_svc.csv + cmd: >- + python merge.py + --big_dir ${item.dataset}/plots/ + --data_file clean/gzip_knn.csv + --little_dir_data_file clean/gzip_logistic.csv clean/gzip_svc.csv + --output_folder ${item.dataset}/plots + --output_file merged.csv + outs: + - ${item.dataset}/plots/merged.csv + ############################################################################## + merge_condense: + matrix: + dataset : [kdd_nsl, sms_spam, ddos] + deps: + - ${item.dataset}/plots/clean/condense/knn.csv + - ${item.dataset}/plots/clean/condense/logistic.csv + - ${item.dataset}/plots/clean/condense/svc.csv + cmd: >- + python merge.py + --big_dir ${item.dataset}/plots/ + --data_file clean/condense/knn.csv + --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv + --output_folder ${item.dataset}/plots/ + --output_file condensed_merged.csv + outs: + - ${item.dataset}/plots/condensed_merged.csv + ############################################################################## + plot: + matrix: + dataset : [kdd_nsl, sms_spam, ddos] + cmd: >- + python -m deckard.layers.plots + --path ${item.dataset}/plots/ + --file ${item.dataset}/plots/merged.csv + -c conf/plots.yaml + deps: + - ${item.dataset}/plots/merged.csv + plots: + - ${item.dataset}/plots/${line_plot[0].file} + - ${item.dataset}/plots/${line_plot[1].file} + - ${item.dataset}/plots/${line_plot[2].file} + - ${item.dataset}/plots/${cat_plot[0].file} + - ${item.dataset}/plots/${cat_plot[1].file} + - ${item.dataset}/plots/${cat_plot[2].file} + - ${item.dataset}/plots/${cat_plot[3].file} + - ${item.dataset}/plots/${cat_plot[4].file} + params: + - conf/plots.yaml: + - line_plot + - cat_plot + ############################################################################## + plot_condense: + matrix: + dataset : [kdd_nsl, sms_spam, ddos] + cmd: >- + python -m deckard.layers.plots + --path ${item.dataset}/plots/ + --file ${item.dataset}/plots/condensed_merged.csv + -c conf/condensed_plots.yaml + deps: + - ${item.dataset}/plots/condensed_merged.csv + plots: + - ${item.dataset}/plots/sampling_method_vs_accuracy.pdf + - ${item.dataset}/plots/sampling_method_vs_train_time.pdf + - ${item.dataset}/plots/sampling_method_vs_predict_time.pdf + params: + - conf/condensed_plots.yaml: + - line_plot + # copy: + # matrix: + # dataset : [kdd_nsl, truthseeker, sms_spam, ddos] + # cmd: >- + # rm -rf ~/Gzip-KNN/figs/${item.dataset}/ && + # mkdir -p ~/Gzip-KNN/figs/${item.dataset}/ && + # cp -r ${item.dataset}/plots/* ~/Gzip-KNN/figs/${item.dataset}/ + # deps: + # - ${item.dataset}/plots/ + # ############################################################################## + # # attack: + # # cmd: python -m deckard.layers.experiment attack + # # deps: + # # - ${files.directory}/${files.data_dir}/${files.data_file}${files.data_type} + # # - ${files.directory}/${files.model_dir}/${files.model_file}${files.model_type} + # # metrics: + # # - ${files.directory}/${files.reports}/attack/${files.name}/${files.score_dict_file} + # # outs: + # # - ${files.directory}/${files.reports}/attack/${files.name}/${files.adv_probabilities_file} + # # params: + # # - data + # # - model + # # - attack + # # - scorers + # # - files + # ############################################################################## + # # attack_optimise: + # # cmd: python -m deckard.layers.optimise +stage=attack +optimizers=adv_accuracy model=best --multirun --config-name attack + # # deps: + # # - ${files.directory}/${files.data_dir}/${files.data_file}${files.data_type} + # # - ${files.directory}/${files.model_dir}/${files.model_file}${files.model_type} + # # - conf/model/best.yaml + # # outs: + # # - attack.db + # # params: + # # - conf/attack.yaml: + # # - hydra + # # find_best_attack: + # # cmd: python -m deckard.layers.find_best attack.yaml + # # deps: + # # - attack.db + # # outs: + # # - conf/attack/best.yaml diff --git a/examples/gzip/gzip_classifier.py b/examples/gzip/gzip_classifier.py new file mode 100644 index 00000000..49d4e159 --- /dev/null +++ b/examples/gzip/gzip_classifier.py @@ -0,0 +1,1051 @@ +#!/usr/bin/env python3 +""" +This is a module that implments a gzip classifier. You can test it by running the following command: +python -m gzip_classifier --compressor gzip --k 3 --m 100 --method random --distance_matrix None --dataset 20newsgroups +""" +# These lines will be used to setup a virtual environment inside the current working directory in a folder called env +# You might need to install venv with: +# sudo apt-get install python3-venv +# python3 -m pip install venv +# python3 -m venv env +# source env/bin/activate +# run `deactivate` to exit the virtual environment +# These lines will be used to install the dependencies needed for this file +# You might need to install pip with: +# sudo apt-get install python3-pip +# python -m pip install numpy scikit-learn tqdm scikit-learn-extra pandas imbalanced-learn + +import numpy as np +import gzip +from tqdm import tqdm +from pathlib import Path +import logging +import time +import argparse +from sklearn.utils.validation import check_is_fitted +from sklearn.utils.multiclass import unique_labels +from sklearn.metrics import accuracy_score +from sklearn.model_selection import train_test_split +from sklearn.datasets import fetch_20newsgroups, make_classification +from sklearn.preprocessing import LabelBinarizer, LabelEncoder +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.neighbors import KNeighborsClassifier +from sklearn.svm import SVC +from sklearn.linear_model import LogisticRegression +from sklearn_extra.cluster import KMedoids +from imblearn.under_sampling import ( + CondensedNearestNeighbour, + NearMiss, + InstanceHardnessThreshold, +) +from Levenshtein import distance, ratio, hamming, jaro, jaro_winkler, seqratio +import pandas as pd + +from joblib import Parallel, delayed +from typing import Literal + +from batchMixin import BatchedMixin + +logger = logging.getLogger(__name__) + + +def _gzip_compressor(x): + return len(gzip.compress(str(x).encode())) + + +def _lzma_compressor(x): + import lzma + + return len(lzma.compress(str(x).encode())) + + +def _bz2_compressor(x): + import bz2 + + return len(bz2.compress(str(x).encode())) + + +def _zstd_compressor(x): + import zstd + + return len(zstd.compress(str(x).encode())) + + +def _pickle_compressor(x): + import pickle + + return len(pickle.dumps(x)) + + +compressors = { + "gzip": _gzip_compressor, + "lzma": _lzma_compressor, + "bz2": _bz2_compressor, + "zstd": _zstd_compressor, + "pkl": _pickle_compressor, +} + + +def ncd( + x1, + x2, + cx1=None, + cx2=None, + method: Literal["gzip", "lzma", "bz2", "zstd", "pkl", None] = "gzip", +) -> float: + """ + Calculate the normalized compression distance between two objects treated as strings. + Args: + x1 (str): The first object + x2 (str): The second object + Returns: + float: The normalized compression distance between x1 and x2 + """ + + compressor = ( + compressors[method] if method in compressors.keys() else compressors["gzip"] + ) + x1 = str(x1) + x2 = str(x2) + Cx1 = compressor(x1) if cx1 is None else cx1 + Cx2 = compressor(x2) if cx2 is None else cx2 + x1x2 = " ".join([x1, x2]) + Cx1x2 = compressor(x1x2) + min_ = min(Cx1, Cx2) + max_ = max(Cx1, Cx2) + ncd = (Cx1x2 - min_) / max_ + return ncd + + +string_metrics = { + "levenshtein": distance, + "ratio": ratio, + "hamming": hamming, + "jaro": jaro, + "jaro_winkler": jaro_winkler, + "seqratio": seqratio, +} + +all_metrics = { + **compressors, + **string_metrics, +} + + +def _calculate_string_distance(x1, x2, method): + x1 = str(x1) + x2 = str(x2) + if method in string_metrics.keys(): + dist = string_metrics[method] + else: + raise NotImplementedError( + f"Method {method} not supported. Supported methods are: {string_metrics.keys()}", + ) + return dist(x1, x2) + + +class GzipClassifier(ClassifierMixin, BaseEstimator): + """An example classifier which implements a 1-NN algorithm. + + For more information regarding how to build your own classifier, read more + in the :ref:`User Guide `. + + Parameters + ---------- + k : int, default=3 + The number of neighbors to use. + m: int, default=-1 + The number of best samples to use. If -1, all samples will be used. + compressor: str, default="gzip" + The name of the compressor to use. Choices are + method: str, default="random" + The method used to select the best training samples. Choices are "sum", "mean", "medoid", "random", "knn", "svc". + metric: str, default="ncd" + The metric used to calculate the distance between samples. Choices are "gzip", "lzma", "bz2", "zstd", "pkl", "pickle", "levenshtein", "ratio", "seqratio", "hamming", "jaro", "jaro". + distance_matrix: str or np.ndarray, default=None + The path to a numpy file or a numpy array representing the distance matrix. If a path is provided, the file will be loaded. If an array is provided, it will be used directly. Default is None. + Attributes + ---------- + X_ : ndarray, shape (n_samples, n_features) + The input passed during :meth:`fit`. + y_ : ndarray, shape (n_samples,) + The labels passed during :meth:`fit`. + classes_ : ndarray, shape (n_classes,) + The classes seen at :meth:`fit`. + distance_matrix_ : ndarray, shape (n_samples, n_samples) + """ + + def __init__( + self, + m=0, + sampling_method="random", + distance_matrix=None, + metric="gzip", + symmetric=False, + precompute=True, + **kwargs, + ): + """ + Initialize the GzipClassifier object. + + Args: + k (int): The value of k for k-nearest neighbors. Default is 3. + m (int): The value of m for m-best samples. Default is -1, which indicates using all training samples. + sampling_method (str): The method used for classification. Default is "random". + metric (str): The metric used to calculate the distance between samples. Default is "ncd". + distance_matrix (str or np.ndarray): The path to a numpy file or a numpy array representing the distance matrix. + If a path is provided, the file will be loaded. If an array is provided, it will be used directly. + Default is None. + symmetric (bool): If True, the distance matrix will be treated as symmetric. Default is False. + precompute (bool): If True, the distance matrix will be precomputed and stored in self.distance_matrix during the fit method and a sklearn KNeighborsClassifier object will be created and stored in self.clf_. + + Raises: + ValueError: If distance_matrix is not a path to a numpy file or a numpy array. + NotImplementedError: If the metric is not supported. + """ + kwarg_string = str([f"{key}={value}" for key, value in kwargs.items()]) + logger.info( + f"Initializing GzipClassifier with m={m}, method={sampling_method}, distance_matrix={distance_matrix}, metric={metric}, symmetric={symmetric}, precompute={precompute}, {kwarg_string}", + ) + self.m = m + self.sampling_method = sampling_method + if metric in compressors.keys(): + logger.info(f"Using NCD metric with {metric} compressor.") + self._distance = ncd + self.metric = metric + elif metric in string_metrics.keys(): + logger.info(f"Using {metric} metric") + self._distance = _calculate_string_distance + self.metric = metric + else: + raise NotImplementedError( + f"Metric {metric} not supported. Supported metrics are: ncd, {string_metrics.keys()} and {compressors.keys()}", + ) + + self.symmetric = symmetric + if self.symmetric is True: + self._calculate_distance_matrix = ( + self._calculate_lower_triangular_distance_matrix + ) + else: + self._calculate_distance_matrix = ( + self._calculate_rectangular_distance_matrix + ) + self.precompute = precompute # If True, the distance matrix will be precomputed and stored in self.distance_matrix during the fit method and a sklearn KNeighborsClassifier object will be created and stored in self.clf_. + self.distance_matrix = distance_matrix + for key, value in kwargs.items(): + setattr(self, key, value) + + def _calculate_rectangular_distance_matrix( + self, + x1, + x2, + Cx1=None, + Cx2=None, + n_jobs=-1, + ): + """ + Calculate the distance matrix between two sets of objects, treating them as strings, assuming d(a,b) != d(b,a) + Args: + x1 (np.ndarray): The first set of objects + x2 (np.ndarray): The second set of objects + Returns: + np.ndarray: The distance matrix of size (len(x1), len(x2)) + """ + matrix_ = np.zeros((len(x1), len(x2))) + pbar = tqdm( + total=len(x1), + desc="Calculating asymmetric distance matrix.", + leave=False, + dynamic_ncols=True, + ) + Cx1 = Cx1 if Cx1 is not None else [None] * len(x1) + Cx2 = Cx2 if Cx2 is not None else [None] * len(x2) + for i in range(len(x1)): + # Parallelize the calculation of the distance matrix + if self.metric in compressors.keys(): + matrix_[i, :] = Parallel(n_jobs=n_jobs)( + delayed(self._distance)( + x1[i], + x2[j], + cx1=Cx1[i], + cx2=Cx2[j], + method=self.metric, + ) + for j in range(len(x2)) + ) + else: + matrix_[i, :] = Parallel(n_jobs=n_jobs)( + delayed(self._distance)(x1[i], x2[j], method=self.metric) + for j in range(len(x2)) + ) + pbar.update(1) + pbar.close() + assert matrix_.shape == ( + len(x1), + len(x2), + ), f"Expected {matrix_.shape} == ({len(x1)}, {len(x2)})" + return matrix_ + + def _calculate_lower_triangular_distance_matrix( + self, + x1, + x2, + Cx1=None, + Cx2=None, + n_jobs=-1, + ): + """ + Calculate the distance matrix between two sets of objects, treating them as strings. Assuming the d(a,b) = d(b,a) + Args: + x1 (np.ndarray): The first set of objects + x2 (np.ndarray): The second set of objects + Returns: + np.ndarray: The distance matrix of size (len(x1), len(x2)) + """ + + matrix_ = np.zeros((len(x1), len(x2))) + pbar = tqdm( + total=len(x1), + desc="Calculating symmetric distance metrix.", + leave=False, + dynamic_ncols=True, + ) + Cx1 = Cx1 if Cx1 is not None else [None] * len(x1) + Cx2 = Cx2 if Cx2 is not None else [None] * len(x2) + for i in range(len(x1)): + # Parallelize the calculation of the distance matrix + if self.metric in compressors.keys(): + matrix_[i, :i] = Parallel(n_jobs=n_jobs)( + delayed(self._distance)( + x1[i], + x2[j], + cx1=Cx1[i], + cx2=Cx2[j], + method=self.metric, + ) + for j in range(i) + ) + else: + matrix_[i, :i] = Parallel(n_jobs=n_jobs)( + delayed(self._distance)(x1[i], x2[j], method=self.metric) + for j in range(i) + ) + # Copy the lower triangular part to the upper triangular part + matrix_[i, :i] = matrix_[:i, i] + pbar.update(1) + pbar.close() + assert matrix_.shape == ( + len(x1), + len(x2), + ), f"Expected {matrix_.shape} == ({len(x1)}, {len(x2)})" + return matrix_ + + def calculate_upper_triangular_distance_matrix( + self, + x1, + x2, + Cx1=None, + Cx2=None, + n_jobs=-1, + ): + matrix_ = np.zeros((len(x1), len(x2))) + pbar = tqdm( + total=len(x1), + desc="Calculating symmetric distance metrix.", + leave=False, + dynamic_ncols=True, + ) + Cx1 = Cx1 if Cx1 is not None else [None] * len(x1) + Cx2 = Cx2 if Cx2 is not None else [None] * len(x2) + for i in range(len(x1)): + if self.metric in compressors.keys(): + matrix_[i, i:] = Parallel(n_jobs=n_jobs)( + delayed(self._distance)( + x1[i], + x2[j], + cx1=Cx1[i], + cx2=Cx2[j], + method=self.metric, + ) + for j in range(i, len(x2)) + ) + else: + matrix_[i, i:] = Parallel(n_jobs=n_jobs)( + delayed(self._distance)(x1[i], x2[j], method=self.metric) + for j in range(i, len(x2)) + ) + # copy the upper triangular part to the lower triangular part + matrix_[i, i:] = matrix_[i:, i] + pbar.update(1) + pbar.close() + assert matrix_.shape == ( + len(x1), + len(x2), + ), f"Expected {matrix_.shape} == ({len(x1)}, {len(x2)})" + return matrix_ + + def _load_distance_matrix(self, path): + if Path(path).exists(): + return np.load(path, allow_pickle=True)["X"] + else: + raise FileNotFoundError(f"Distance matrix file {path} not found") + + def _save_distance_matrix(self, path, matrix): + Path(path).parent.mkdir(parents=True, exist_ok=True) + np.savez_compressed(path, X=matrix) + + def _prepare_training_matrix(self, n_jobs=-1): + """ + Prepare the distance matrix for classification. + If self.distance_matrix is a path to a numpy file, it will be loaded. + If it is a numpy array, it will be used directly. + If it is None, the distance matrix will be calculated using self.X_ and self.X_. + """ + + if ( + isinstance(self.distance_matrix, str) + and Path(self.distance_matrix).exists() + ): + distance_matrix = self._load_distance_matrix(self.distance_matrix) + elif ( + isinstance(self.distance_matrix, str) + and not Path(self.distance_matrix).exists() + ): + distance_matrix = self._calculate_distance_matrix( + self.X_, + self.X_, + Cx1=self.Cx_, + Cx2=self.Cx_, + n_jobs=n_jobs, + ) + self._save_distance_matrix(self.distance_matrix, distance_matrix) + elif isinstance(self.distance_matrix, np.ndarray): + distance_matrix = self.distance_matrix + elif isinstance(self.distance_matrix, type(None)): + distance_matrix = self._calculate_distance_matrix( + self.X_, + self.X_, + Cx1=self.Cx_, + Cx2=self.Cx_, + n_jobs=n_jobs, + ) + else: + raise ValueError( + f"distance_matrix must be a path to a numpy file or a numpy array, got {type(self.distance_matrix)}", + ) + return distance_matrix + + def _find_best_samples(self, method="medoid", n_jobs=-1): + """ + Args: + method (str): The method used to select the best training samples. Default is "medoid". Choices are "sum", "mean", "medoid", "random", "knn", "svc". + Returns: + list: The indices of the best training samples. + """ + self.distance_matrix = self._prepare_training_matrix(n_jobs=n_jobs) + assert isinstance( + self.distance_matrix, + np.ndarray, + ), f"Expected {type(self.distance_matrix)} to be np.ndarray" + distance_matrix = self.distance_matrix + indices = [] + if isinstance(self.m, float): + m = int(self.m * len(self.X_) / self.n_classes_) + if m == 0: + m = 1 + else: + m = self.m + y = self.y_ + n_classes = len(unique_labels(y)) + if method in ["sum", "medoid", "svc", "random"]: + if method == "sum": + for label in np.unique(y): + label_idx = np.where(y == label)[0] + label_distance_matrix = distance_matrix[label_idx, :] + summed_matrix = np.sum(label_distance_matrix, axis=0) + sorted_idx = np.argsort(summed_matrix) + indices.extend(sorted_idx[:m]) + elif method == "medoid": + for label in np.unique(y): + label_idx = np.where(y == label)[0] + min_ = min(m, len(label_idx)) + label_distance_matrix = distance_matrix[label_idx, :][:, label_idx] + kmedoids = KMedoids(n_clusters=min_, metric="precomputed").fit( + label_distance_matrix, + ) + indices.extend(kmedoids.medoid_indices_[:m]) + elif method == "svc": + svc = SVC(kernel="precomputed").fit(distance_matrix, y) + support_idx = svc.support_ + summed_matrix = np.sum(distance_matrix, axis=0) + sorted_idx = np.argsort(summed_matrix[support_idx])[ + ::-1 + ] # Sort in descending order + indices.extend(sorted_idx[: m * n_classes]) + elif method == "random": + keys = np.unique(y) + values = [m] * len(keys) + dict_ = dict(zip(keys, values)) + for label in np.unique(y): + label_idx = np.where(y == label)[0] + if len(label_idx) < m: + random_idx = np.random.choice(label_idx, m, replace=True) + else: + random_idx = np.random.choice(label_idx, m, replace=False) + indices.extend(random_idx) + else: + raise NotImplementedError(f"Method {method} not supported") + elif method in ["hardness", "nearmiss", "knn"]: + if method == "hardness": + keys = np.unique(y) + values = [m] * len(keys) + dict_ = dict(zip(keys, values)) + model = InstanceHardnessThreshold(sampling_strategy=dict_) + elif method == "nearmiss": + keys = np.unique(y) + values = [m] * len(keys) + dict_ = dict(zip(keys, values)) + model = NearMiss(sampling_strategy=dict_) + elif method == "knn": + distance_matrix = pd.DataFrame( + distance_matrix, + columns=range(len(distance_matrix)), + ) + y = pd.DataFrame(y, columns=["y"]) + y.index = list(range(len(y))) + model = CondensedNearestNeighbour(sampling_strategy="not majority") + else: + raise NotImplementedError(f"Method {method} not supported") + distance_matrix = pd.DataFrame( + distance_matrix, + columns=list(range(len(distance_matrix))), + ) + y = pd.DataFrame(y, columns=["y"]) + y.index = list(range(len(y))) + distance_matrix, y = model.fit_resample(distance_matrix, y) + indices = y.index[: m * n_classes] + else: + raise NotImplementedError(f"Method {method} not supported") + return indices + + def fit(self, X: np.ndarray, y: np.ndarray, n_jobs=-1): + """Fit the model using X as training data and y as target values. If self.m is not -1, the best m samples will be selected using the method specified in self.sampling_method. + + Args: + X (np.ndarray): The input data + y (np.ndarray): The target labels + + Returns: + GzipClassifier: The fitted model + """ + assert len(X) == len(y), f"Expected {len(X)} == {len(y)}" + logger.info(f"Fitting with X of shape {X.shape} and y of shape {y.shape}") + self.X_ = np.array(X) if not isinstance(X, np.ndarray) else X + y = np.array(y) if not isinstance(y, np.ndarray) else y + if len(np.squeeze(y).shape) == 1: + encoder = LabelBinarizer() + self.y_ = encoder.fit_transform(y) + self.n_classes_ = len(unique_labels(y)) + flat_y = np.squeeze(y).astype(int) + else: + self.y_ = y + self.n_classes_ = y.shape[1] + flat_y = np.argmax(y, axis=1) + counts = np.bincount(flat_y) + self.counts_ = counts + logger.info(f"Num Classes: {self.n_classes_}, counts: {counts}") + self.n_features_ = X.shape[1] if len(X.shape) > 1 else 1 + self.classes_ = range(len(unique_labels(y))) + + if self.metric in compressors.keys(): + compressor = compressors[self.metric] + Cx_ = Parallel(n_jobs=n_jobs)(delayed(compressor)(x) for x in self.X_) + self.Cx_ = np.array(Cx_) if not isinstance(Cx_, np.ndarray) else Cx_ + else: + self.Cx_ = None + self.X_ = self.X_.astype(str) + if self.m > 0: + assert isinstance( + self.m, + (int, float), + ), f"Expected {self.m} to be an integer" + assert isinstance( + self.sampling_method, + (str, type(None)), + ), f"Expected {self.sampling_method} to be a string or None" + indices = self._find_best_samples(self.sampling_method) + self._set_best_indices(indices) + elif self.m == -1: + distance_matrix = self._prepare_training_matrix(n_jobs=n_jobs) + self.distance_matrix = distance_matrix + elif self.m is None or self.m == 0: + pass + else: + raise ValueError( + f"Expected {self.m} to be -1, 0, a positive integer or a float between 0 and 1. Got type {type(self.m)}", + ) + if self.precompute is True: + self.distance_matrix = self._prepare_training_matrix(n_jobs=n_jobs) + self.clf_ = self.clf_.fit(self.distance_matrix, self.y_) + else: + raise NotImplementedError( + f"Precompute {self.precompute} not supported for type(self.clf_) {type(self.clf_)}", + ) + return self + + def _set_best_indices(self, indices): + self.X_ = self.X_[indices] + self.y_ = self.y_[indices] + if self.Cx_ is not None: + self.Cx_ = self.Cx_[indices] + # This is a hack that allows us to deal with n-dimensional arrays using the normal matrix[:, indices][indices, :] breaks if n>2 + distance_matrix = self.distance_matrix[ + indices + ].T # select the rows at the indices and transpose the matrix + distance_matrix = distance_matrix[ + indices + ] # select the transposed columns at the indices + self.distance_matrix = distance_matrix.T # transpose the matrix again + logger.info( + f"Selected {len(self.X_)} samples using method {self.sampling_method}.", + ) + counts = np.bincount(np.argmax(self.y_, axis=1)) + logger.info(f"Num Classes: {self.n_classes_}, counts: {counts}") + assert len(self.X_) == len( + self.y_, + ), f"Expected {len(self.X_)} == {len(self.y_)}" + assert distance_matrix.shape == ( + len(self.X_), + len(self.X_), + ), f"Expected {distance_matrix.shape} == ({len(self.X_)}, {len(self.X_)})" + + def predict(self, X: np.ndarray): + """Predict the class labels for the provided data. + + Args: + X (np.ndarray): The input data + + Returns: + np.ndarray: The predicted class labels + """ + check_is_fitted(self) + logger.info(f"Predicting with X of shape {X.shape}") + if self.metric in compressors.keys(): + compressor = compressors[self.metric] + Cx2 = Parallel(n_jobs=-1)( + delayed(compressor)(x) + for x in tqdm( + X, + desc="Compressing samples", + leave=False, + dynamic_ncols=True, + ) + ) + assert len(Cx2) == len(X), f"Expected {len(Cx2)} == {len(X)}" + assert len(self.X_) == len( + self.Cx_, + ), f"Expected {len(self.X_)} == {len(self.Cx_)}" + distance_matrix = self._calculate_rectangular_distance_matrix( + x1=X, + Cx1=Cx2, + x2=self.X_, + Cx2=self.Cx_, + n_jobs=-1, + ) + else: + distance_matrix = self._calculate_rectangular_distance_matrix( + x2=self.X_, + x1=X, + n_jobs=-1, + ) + assert distance_matrix.shape == ( + len(X), + len(self.X_), + ), f"Expected {distance_matrix.shape} == ({len(X)}, {len(self.X_)})" + y_pred = self.clf_.predict(distance_matrix) + + if len(np.squeeze(y_pred).shape) == 1: + encoder = LabelBinarizer() + y_pred = encoder.fit(self.y_).transform(y_pred) + else: + encoder = LabelEncoder() + y_pred = encoder.fit(self.y_).transform(y_pred) + return y_pred + + def score(self, X: np.ndarray, y: np.ndarray): + """Score the model using the provided data. + + Args: + X (np.ndarray): The input data + y (np.ndarray): The target labels + + Returns: + float: The accuracy of the model + """ + y_pred = self.predict(X) + return accuracy_score(y, y_pred) + + +class BatchedGzipClassifier(GzipClassifier, BatchedMixin): + pass + + +class GzipKNN(GzipClassifier): + def __init__( + self, + k: int = 2, + m=0, + sampling_method="random", + distance_matrix=None, + metric="gzip", + symmetric=False, + precompute=True, + **kwargs, + ): + super().__init__( + sampling_method=sampling_method, + m=m, + distance_matrix=distance_matrix, + metric=metric, + symmetric=symmetric, + precompute=precompute, + **kwargs, + ) + self.clf_ = KNeighborsClassifier(n_neighbors=k, metric="precomputed", **kwargs) + self.k = k + + def predict(self, X: np.ndarray, n_jobs=-1): + """Predict the class labels for the provided data. + + Args: + X (np.ndarray): The input data + + Returns: + np.ndarray: The predicted class labels + """ + check_is_fitted(self) + + logger.info(f"Predicting with X of shape {X.shape}") + # Pre-compress samples not working + if self.metric in compressors.keys(): + compressor = compressors[self.metric] + Cx2 = Parallel(n_jobs=n_jobs)( + delayed(compressor)(x) + for x in tqdm( + X, + desc="Compressing samples", + leave=False, + dynamic_ncols=True, + ) + ) + assert len(Cx2) == len(X), f"Expected {len(Cx2)} == {len(X)}" + assert len(self.X_) == len( + self.Cx_, + ), f"Expected {len(self.X_)} == {len(self.Cx_)}" + distance_matrix = self._calculate_rectangular_distance_matrix( + x1=X, + Cx1=Cx2, + x2=self.X_, + Cx2=self.Cx_, + n_jobs=n_jobs, + ) + else: + distance_matrix = self._calculate_rectangular_distance_matrix( + X, + self.X_, + n_jobs=n_jobs, + ) + assert distance_matrix.shape == ( + len(X), + len(self.X_), + ), f"Expected {distance_matrix.shape} == ({len(X)}, {len(self.X_)})" + y_pred = [] + if self.precompute is True: + y_pred = self.clf_.predict(distance_matrix) + else: + for i in tqdm( + range(len(X)), + desc="Predicting", + leave=False, + total=len(X), + dynamic_ncols=True, + ): + # Sort the distances and get the nearest k samples + sorted_idx = np.argsort(distance_matrix[i]) + # Get the first k samples + nearest_k = sorted_idx[: self.k] + # Get the labels of the nearest samples + nearest_labels = list(self.y_[nearest_k]) + # predict class + unique, counts = np.unique(nearest_labels, return_counts=True) + # Get the most frequent label + y_pred.append(unique[np.argmax(counts)]) + return y_pred + + +class BatchedGzipKNN(GzipKNN, BatchedMixin): + pass + + +class GzipLogisticRegressor(GzipClassifier): + def __init__( + self, + m=0, + sampling_method="random", + distance_matrix=None, + metric="gzip", + symmetric=False, + precompute=True, + **kwargs, + ): + self.precompute = precompute + clf = LogisticRegression(**kwargs) + super().__init__( + clf_=clf, + precompute=precompute, + sampling_method=sampling_method, + m=m, + distance_matrix=distance_matrix, + metric=metric, + symmetric=symmetric, + **kwargs, + ) + + +class BatchedGzipLogisticRegressor(GzipLogisticRegressor, BatchedMixin): + pass + + +class GzipSVC(GzipClassifier): + def __init__( + self, + kernel="rbf", + m=0, + sampling_method="random", + distance_matrix=None, + metric="gzip", + symmetric=False, + precompute=True, + **kwargs, + ): + self.precompute = precompute + clf = SVC(kernel=kernel, **kwargs) + super().__init__( + clf_=clf, + precompute=precompute, + sampling_method=sampling_method, + m=m, + distance_matrix=distance_matrix, + metric=metric, + symmetric=symmetric, + **kwargs, + ) + self.kernel = kernel + + +class BatchedGzipSVC(GzipSVC, BatchedMixin): + pass + + +supported_models = { + "knn": GzipKNN, + "logistic": GzipLogisticRegressor, + "svc": GzipSVC, +} + +batched_models = { + "knn": BatchedGzipKNN, + "logistic": BatchedGzipLogisticRegressor, + "svc": BatchedGzipSVC, +} + +model_scorers = { + "knn": "accuracy", + "logistic": "accuracy", + "svc": "accuracy", +} + +scorers = { + "accuracy": accuracy_score, +} + + +def test_model( + X_train, + X_test, + y_train, + y_test, + model_type, + optimizer=None, + batched=False, + **kwargs, +) -> dict: + """ + Args: + X (np.ndarray): The input data + y (np.ndarray): The target labels + train_size (int): The number of samples to use for training. Default is 100. + test_size (int): The number of samples to use for testing. Default is 100. + **kwargs: Additional keyword arguments to pass to the GzipClassifier + Returns: + dict: A dictionary containing the accuracy, train_time, and pred_time + """ + if batched is True: + model = batched_models[model_type](**kwargs) + else: + model = supported_models[model_type](**kwargs) + alias = model_scorers[model_type] + scorer = scorers[alias] + start = time.time() + model.fit(X_train, y_train) + check_is_fitted(model) + end = time.time() + train_time = end - start + start = time.time() + predictions = model.predict(X_test) + end = time.time() + pred_time = end - start + score = round(scorer(y_test, predictions), 3) + print(f"Training time: {train_time}") + print(f"Prediction time: {pred_time}") + print(f"{alias} is: {score}") + score_dict = { + f"{alias.lower()}": score, + "train_time": train_time, + "pred_time": pred_time, + } + if optimizer is not None: + score = score_dict[optimizer] + return score + else: + return score_dict + + +def load_data(dataset, precompressed): + if dataset == "20newsgroups": + X, y = fetch_20newsgroups( + subset="train", + categories=["alt.atheism", "talk.religion.misc"], + shuffle=True, + random_state=42, + return_X_y=True, + ) + y = ( + LabelEncoder().fit(y).transform(y) + ) # Turns the labels "alt.atheism" and "talk.religion.misc" into 0 and 1 + elif dataset == "kdd_nsl": + df = pd.read_csv("raw_data/kdd_nsl.csv") + y = df["label"] + X = df.drop("label", axis=1) + elif dataset == "kdd_nsl": + df = pd.read_csv("raw_data/kdd_nsl.csv") + y = df["label"] + X = df.drop("label", axis=1) + X = np.array(X) + elif dataset == "make_classification": + X, y = make_classification( + n_samples=1000, + n_features=20, + n_classes=2, + random_state=42, + ) + y = LabelEncoder().fit(y).transform(y) + elif dataset == "truthseeker": + df = pd.read_csv("raw_data/truthseeker.csv") + y = df["BotScoreBinary"] + X = df.drop("BotScoreBinary", axis=1) + elif dataset == "sms-spam": + df = pd.read_csv("raw_data/sms-spam_undersampled_1450.csv") + y = df["label"] + X = df.drop("label", axis=1) + elif dataset == "ddos": + df = pd.read_csv("raw_data/ddos.csv") + y = df["Label"] + X = df.drop("Label", axis=1) + else: + raise ValueError( + f"Dataset {dataset} not found. Options are: 20newsgroups, kdd_nsl, make_classification, truthseeker, sms-spam, ddos.", + ) + if precompressed is True: + X = pd.DataFrame(X).applymap(lambda x: len(gzip.compress(str(x).encode()))) + X = np.array(X) + return X, y + + +def prepare_data( + dataset="truthseeker", + precompressed=False, + train_size=100, + test_size=100, + random_state=42, +): + X, y = load_data(dataset, precompressed=precompressed) + X = np.array(X) + y = np.array(y) + X_train, X_test, y_train, y_test = train_test_split( + X, + y, + train_size=train_size, + test_size=test_size, + random_state=random_state, + stratify=y, + ) + return X_train, X_test, y_train, y_test + + +def main(args: argparse.Namespace): + """ + This is the main function that runs the GzipClassifier with the provided arguments. + It will fetch the dataset, split it into training and testing sets. + Then, it will train the model using the fit method and test it using the predict method. + Args: + args (argparse.Namespace): The command line arguments + Usage: + python gzip_classifier.py --compressor gzip --k 3 --m 100 --method random --distance_matrix distance_matrix --dataset kdd_nsl + """ + + X, y = load_data(dataset=args.dataset, precompressed=args.precompressed) + params = vars(args) + dataset = params.pop("dataset") + precompressed = params.pop("precompressed") + train_size = params.pop("train_size") + test_size = params.pop("test_size") + random_state = params.pop("random_state") + X_train, X_test, y_train, y_test = prepare_data( + dataset=dataset, + precompressed=precompressed, + train_size=train_size, + test_size=test_size, + random_state=random_state, + ) + kwarg_args = params.pop("kwargs") + # conver list of key-value pairs to dictionary + kwarg_args = dict([arg.split("=") for arg in kwarg_args]) + params.update(**kwarg_args) + params["precompute"] = True + X = np.array(X) if not isinstance(X, np.ndarray) else X + y = np.array(y) if not isinstance(y, np.ndarray) else y + test_model(X_train, X_test, y_train, y_test, **params) + + +parser = argparse.ArgumentParser() +parser.add_argument("--model_type", type=str, default="knn") +parser.add_argument("--precompute", action="store_true") +parser.add_argument("--symmetric", action="store_true") +parser.add_argument("--metric", type=str, default="gzip", choices=all_metrics) +parser.add_argument("--m", type=int, default=-1) +parser.add_argument("--sampling_method", type=str, default="random") +parser.add_argument("--distance_matrix", type=str, default=None) +parser.add_argument("--dataset", type=str, default="kdd_nsl") +parser.add_argument("--train_size", type=int, default=100) +parser.add_argument("--test_size", type=int, default=100) +parser.add_argument("--optimizer", type=str, default="accuracy") +parser.add_argument("--precompressed", action="store_true") +parser.add_argument("--random_state", type=int, default=42) +parser.add_argument("kwargs", nargs=argparse.REMAINDER) + +if __name__ == "__main__": + args = parser.parse_args() + logging.basicConfig(level=logging.INFO) + main(args) diff --git a/examples/gzip/merge.py b/examples/gzip/merge.py new file mode 100644 index 00000000..afbd4d09 --- /dev/null +++ b/examples/gzip/merge.py @@ -0,0 +1,161 @@ +import pandas as pd +from pathlib import Path +import logging +import yaml +import argparse +from deckard.layers.compile import save_results + +logger = logging.getLogger(__name__) + + +__all__ = ["merge_csv", "merge_main", "merge_parser"] + + +def merge_csv( + big_dir, + little_dir, + output_file="merged", + data_file="raw.csv", + little_dir_data_file=None, + fillna={}, + how="outer", + **kwargs, +): + """ + The function `merge_csv` merges two CSV files, one from a big directory and one from a little + directory, and saves the merged file. + + Args: + big_dir: The `big_dir` parameter is the directory path where the dataset to be merged into is located. This + dataset is assumed to have a file named "raw.csv" which will be read. + little_dir: The `little_dir` parameter is the directory path where the smaller dataset is located. + data_file: The `data_file` parameter is the name of the CSV file that will be used for both the + `big` and `small` dataframes. If `little_dir_data_file` is not provided, then the `data_file` from + the `big` directory will be used for both dataframes. Defaults to raw.csv + little_dir_data_file: The parameter `little_dir_data_file` is an optional argument that specifies + the name of the data file in the `little_dir` directory. If this argument is provided, the function + will read the data from the specified file in the `little_dir` directory. If this argument is not + provided, the + + Returns: + None. + """ + if Path(Path(big_dir) / data_file).exists() is False: + big = pd.DataFrame() + else: + big = pd.read_csv(Path(big_dir) / data_file, index_col=0) + assert "name" in big + if little_dir is None: + little_dir = big_dir + if little_dir_data_file is not None: + small = pd.read_csv(Path(little_dir) / little_dir_data_file, index_col=0) + assert "name" in small + else: + small = pd.read_csv(Path(little_dir) / data_file) + logger.info(f"Shape of big: {big.shape}") + logger.info(f"Shape of small: {small.shape}") + merged = pd.merge(big, small, how=how, **kwargs) + for k, v in fillna.items(): + if k in merged.columns: + merged[k] = merged[k].fillna(v) + else: + merged[k] = v + logger.info(f"Shape of merged: {merged.shape}") + logger.info(f"Saving merged to {data_file}.") + results_folder = Path(output_file).parent + results_file = Path(output_file).name + results_folder.mkdir(parents=True, exist_ok=True) + merged["id"] = merged["name"] + saved_path = save_results( + merged, + results_file=results_file, + results_folder=results_folder, + ) + assert Path(saved_path).exists(), f"Saved path {saved_path} does not exist." + return None + + +def merge_main(args): + if args.config is not None: + with open(args.config, "r") as stream: + fillna = yaml.safe_load(stream).get("fillna", {}) + else: + fillna = {} + if args.output_folder is None: + args.output_folder = Path().cwd() + output_file = Path(args.output_folder) / args.output_file + if isinstance(args.little_dir_data_file, list): + for little in args.little_dir_data_file: + merge_csv( + args.big_dir, + args.little_dir, + data_file=args.data_file, + little_dir_data_file=little, + fillna=fillna, + output_file=output_file, + ) + args.big_dir = Path(args.output_folder) + args.data_file = Path(args.output_file).name + print(f"Big dir: {args.big_dir}") + print(f"Data file: {args.data_file}") + print(f"Output file: {args.output_file}") + else: + merge_csv( + args.big_dir, + args.little_dir, + data_file=args.data_file, + little_dir_data_file=args.little_dir_data_file, + fillna=fillna, + output_file=output_file, + how="outer", + ) + + +merge_parser = argparse.ArgumentParser() +merge_parser.add_argument( + "--big_dir", + type=str, + help="Directory of the big run", + required=True, +) +merge_parser.add_argument( + "--little_dir", + type=str, + help="Directory of the small run", + required=False, +) +merge_parser.add_argument( + "--data_file", + type=str, + help="Name of the data file", + required=True, +) +merge_parser.add_argument( + "--output_file", + type=str, + help="Name of the output file", + default="merged.csv", +) +merge_parser.add_argument( + "--output_folder", + type=str, + help="Name of the output folder", + required=False, +) +merge_parser.add_argument( + "--little_dir_data_file", + type=str, + help="Name(s) of the files to merge into the big file.", + required=False, + nargs="*", +) +merge_parser.add_argument( + "--config", + type=str, + help="Name of file containing a 'fillna' config dictionary.", + required=False, +) + +if __name__ == "__main__": + args = merge_parser.parse_args() + merge_main(args) diff --git a/examples/gzip/privacy_metric.py b/examples/gzip/privacy_metric.py new file mode 100644 index 00000000..e6856491 --- /dev/null +++ b/examples/gzip/privacy_metric.py @@ -0,0 +1,101 @@ +from gzip_classifier import GzipSVC +import art +import numpy as np +from gzip_classifier import prepare_data + + +# Create a GzipKNN classifier +def calculate_privacy_risk(X_train, X_test, y_train, y_test, metric): + clf = GzipSVC(metric=metric) + clf.fit(X_train, y_train) + est = art.estimators.classification.SklearnClassifier(model=clf, preprocessing=None) + privacy_risk = art.metrics.SHAPr( + target_estimator=est, + x_train=X_train, + x_test=X_test, + y_train=y_train, + y_test=y_test, + ) + accuracies = clf.score(X_test, y_test) + return privacy_risk, accuracies + + +def line_search_metrics(X_train, X_test, y_train, y_test, metrics): + metric_dict = {} + acc_list = [] + shapr_list = [] + for metric in metrics: + scores, privacy_risks = calculate_privacy_risk( + X_train, + X_test, + y_train, + y_test, + metric, + ) + metric_dict[metric] = {} + nb_classes = len(np.unique(y_test)) + shaprs = [] + accuracies = [] + for i in range(nb_classes): + idxs = np.where(y_test == i) + privacy_risk = np.mean(privacy_risks[idxs]) + privacy_risk = 0.01 if privacy_risk == 0 else privacy_risk + shaprs.append(privacy_risk) + accuracy = round(np.mean(scores[idxs]), 2) + accuracies.append(accuracy) + print( + f"{metric.capitalize()} SHAPr for class {i}: {privacy_risk}; Accuracy: {accuracy}", + ) + assert len(shaprs) == nb_classes + assert len(accuracies) == nb_classes + acc_list.append(accuracies) + shapr_list.append(shaprs) + print(f"{metric.capitalize()} SHAPr: {shaprs}; Accuracy: {accuracies}") + import plotext as plt + + label_prefixes = ["Accuracy", "SHAPr"] + labels = [ + f"{label_prefixes[i]}_{j}" + for i in range(nb_classes) + for j in range(len(label_prefixes)) + ] + values = [] + # Turn acc_list into separate lists for each class + for i in range(nb_classes): + sub_values = [acc_list[j][i] for j in range(len(acc_list))] + values.append(sub_values) + for i in range(nb_classes): + sub_values = [shapr_list[j][i] for j in range(len(shapr_list))] + values.append(sub_values) + plt.simple_multiple_bar( + metrics, + values, + title="Accuracy and SHAPr for different metrics", + labels=labels, + ) + plt.show() + return metric_dict + + +if __name__ == "__main__": + metrics = [ + "gzip", + "lzma", + "bz2", + "zstd", + "pkl", + "levenshtein", + "ratio", + "hamming", + "jaro", + "jaro_winkler", + "seqratio", + ] + datasets = ["kdd_nsl", "truthseeker", "sms-spam", "ddos"] + + for dataset in datasets: + X_train, X_test, y_train, y_test = prepare_data(dataset=dataset) + line_search_metrics(X_train, X_test, y_train, y_test, metrics) + + +# Decorator to make to turn a fit function into the batch'd version diff --git a/examples/pytorch/.dvc/.gitignore b/examples/pytorch/.dvc/.gitignore new file mode 100644 index 00000000..528f30c7 --- /dev/null +++ b/examples/pytorch/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/examples/pytorch/.dvc/tmp/lock b/examples/pytorch/.dvc/tmp/lock index b7b7a089..d6eb4e1b 100644 --- a/examples/pytorch/.dvc/tmp/lock +++ b/examples/pytorch/.dvc/tmp/lock @@ -1 +1 @@ - 21824 + 1966819 diff --git a/examples/pytorch/.dvc/tmp/rwlock b/examples/pytorch/.dvc/tmp/rwlock index 9e26dfee..0967ef42 100644 --- a/examples/pytorch/.dvc/tmp/rwlock +++ b/examples/pytorch/.dvc/tmp/rwlock @@ -1 +1 @@ -{} \ No newline at end of file +{} diff --git a/examples/pytorch/cifar10/.dvc/config b/examples/pytorch/cifar10/.dvc/config index e69de29b..4cf322d9 100644 --- a/examples/pytorch/cifar10/.dvc/config +++ b/examples/pytorch/cifar10/.dvc/config @@ -0,0 +1,2 @@ +[core] + autostage = true diff --git a/examples/pytorch/cifar10/conf/afr.yaml b/examples/pytorch/cifar10/conf/afr.yaml index ae09b33b..7413432c 100644 --- a/examples/pytorch/cifar10/conf/afr.yaml +++ b/examples/pytorch/cifar10/conf/afr.yaml @@ -1,19 +1,20 @@ covariates: + - "adv_fit_time" - "accuracy" - "train_time" - - "predict_time" - "atk_value" - "def_value" - "data.sample.random_state" - - "adv_failure_rate" - "model_layers" - - "adv_fit_time" - model.trainer.nb_epoch +# - atk_gen +# - def_gen + - predict_time fillna: model.trainer.nb_epoch: 20 weibull: plot: - file : weibull_aft.pdf + file : weibull_aft.eps title : Weibull AFR Model labels: "Intercept: rho_": "$\\rho$" @@ -25,13 +26,16 @@ weibull: "adv_accuracy: lambda_": "Adv. Accuracy" "accuracy: lambda_": "Ben. Accuracy" "adv_fit_time: lambda_": "$t_{attack}$" - "adv_failure_rate: lambda_": "$h_{adv.}(t;\\theta)$" - "failure_rate: lambda_": "$h_{ben.}(t;\\theta)$" + "adv_failure_rate: lambda_": "$f_{adv.}(t;\\theta)$" + "failure_rate: lambda_": "$f_{ben.}(t;\\theta)$" "model.trainer.nb_epoch: lambda_": "No. of Epochs" "model.trainer.batch_size: lambda_": "Batch Size" "def_gen": "Defence" + "model_layers: lambda_" : "Layers" + "def_value: lambda_" : "Defence Strength" + "predict_time: lambda_" : "$t_{predict}$" partial_effect: - - "file": "weibull_epochs_partial_effect.pdf" + - "file": "weibull_epochs_partial_effect.eps" "covariate_array": "model.trainer.nb_epoch" "values_array": [1,10,25,50] "title": "$S(t)$ for Weibull AFR" @@ -41,19 +45,19 @@ weibull: "title": "Epochs", "labels": ["1", "10", "25", "50"] } - - "file": "weibull_layers_partial_effect.pdf" + - "file": "weibull_layers_partial_effect.eps" "covariate_array": "model_layers" "values_array": [18, 34, 50, 101, 152] - "title": "$S(t)$ for Cox AFR" + "title": "$S(t)$ for Weibull AFR" "ylabel": "Expectation of $S(t)$" "xlabel": "Time $T$ (seconds)" "legend_kwargs": { - "title": "ResNet Layers", + "title": "Layers", "labels": ["18", "34", "50", "101", "152"] } # cox: # plot: -# file : cox_aft.pdf +# file : cox_aft.eps # title : Cox AFR Model # labels: # "Intercept: rho_": "$\\rho$" @@ -65,13 +69,13 @@ weibull: # "adv_accuracy: lambda_": "Adv. Accuracy" # "accuracy: lambda_": "Ben. Accuracy" # "adv_fit_time: lambda_": "$t_{attack}$" -# "adv_failure_rate: lambda_": "$h_{adv.}(t;\\theta)$" -# "failure_rate: lambda_": "$h_{ben.}(t;\\theta)$" +# "adv_failure_rate: lambda_": "$f_{adv.}(t;\\theta)$" +# "failure_rate: lambda_": "$f_{ben.}(t;\\theta)$" # "model.trainer.nb_epoch: lambda_": "No. of Epochs" # "model.trainer.batch_size: lambda_": "Batch Size" # "def_gen": "Defence" # partial_effect: -# - "file": "cox_epochs_partial_effect.pdf" +# - "file": "cox_epochs_partial_effect.eps" # "covariate_array": "model.trainer.nb_epoch" # "values_array": [1,10,25,50] # "title": "$S(t)$ for Cox AFR" @@ -81,37 +85,40 @@ weibull: # "title": "Epochs", # "labels": ["1", "10", "25", "50"] # } -# - "file": "cox_layers_partial_effect.pdf" +# - "file": "cox_layers_partial_effect.eps" # "covariate_array": "model_layers" # "values_array": [18, 34, 50, 101, 152] # "title": "$S(t)$ for Cox AFR" # "ylabel": "Expectation of $S(t)$" # "xlabel": "Time $T$ (seconds)" # "legend_kwargs": { -# "title": "ResNet Layers", +# "title": "Layers", # "labels": ["18", "34", "50", "101", "152"] # } log_logistic: plot: - file : log_logistic_aft.pdf + file : log_logistic_aft.eps title : Log logistic AFR Model labels: - "Intercept: rho_": "$\\rho$" - "Intercept: lambda_": "$\\lambda$" - "data.sample.random_state: lambda_": "Random State" - "atk_value: lambda_": "Attack Strength" - "train_time: lambda_": "$t_{train}$" - "predict_proba_time: lambda_": "$t_{predict}$" - "adv_accuracy: lambda_": "Adv. Accuracy" - "accuracy: lambda_": "Ben. Accuracy" - "adv_fit_time: lambda_": "$t_{attack}$" - "adv_failure_rate: lambda_": "$h_{adv.}(t;\\theta)$" - "failure_rate: lambda_": "$h_{ben.}(t;\\theta)$" - "model.trainer.nb_epoch: lambda_": "No. of Epochs" - "model.trainer.batch_size: lambda_": "Batch Size" + "Intercept: beta_": "$\\beta$" + "Intercept: alpha_": "$\\alpha$" + "data.sample.random_state: alpha_": "Random State" + "atk_value: alpha_": "Attack Strength" + "train_time: alpha_": "$t_{train}$" + "predict_proba_time: alpha_": "$t_{predict}$" + "adv_accuracy: alpha_": "Adv. Accuracy" + "accuracy: alpha_": "Ben. Accuracy" + "adv_fit_time: alpha_": "$t_{attack}$" + "adv_failure_rate: alpha_": "$f_{adv.}(t;\\theta)$" + "failure_rate: alpha_": "$f_{ben.}(t;\\theta)$" + "model.trainer.nb_epoch: alpha_": "No. of Epochs" + "model.trainer.batch_size: alpha_": "Batch Size" "def_gen": "Defence" + "model_layers: alpha_" : "Layers" + "def_value: alpha_" : "Defence Strength" + "predict_time: alpha_" : "$t_{predict}$" partial_effect: - - "file": "log_logistic_epochs_partial_effect.pdf" + - "file": "log_logistic_epochs_partial_effect.eps" "covariate_array": "model.trainer.nb_epoch" "values_array": [1,10,25,50] "title": "$S(t)$ for Log-Logistic AFR" @@ -121,37 +128,40 @@ log_logistic: "title": "Epochs", "labels": ["1", "10", "25", "50"] } - - "file": "log_logistic_layers_partial_effect.pdf" + - "file": "log_logistic_layers_partial_effect.eps" "covariate_array": "model_layers" "values_array": [18, 34, 50, 101, 152] - "title": "$S(t)$ for Cox AFR" + "title": "$S(t)$ for Log Logistic AFR" "ylabel": "Expectation of $S(t)$" "xlabel": "Time $T$ (seconds)" "legend_kwargs": { - "title": "ResNet Layers", + "title": "Layers", "labels": ["18", "34", "50", "101", "152"] } log_normal: plot: - file : log_normal_aft.pdf + file : log_normal_aft.eps title : Log Normal AFR Model labels: - "Intercept: rho_": "$\\rho$" - "Intercept: lambda_": "$\\lambda$" - "data.sample.random_state: lambda_": "Random State" - "atk_value: lambda_": "Attack Strength" - "train_time: lambda_": "$t_{train}$" - "predict_proba_time: lambda_": "$t_{predict}$" - "adv_accuracy: lambda_": "Adv. Accuracy" - "accuracy: lambda_": "Ben. Accuracy" - "adv_fit_time: lambda_": "$t_{attack}$" - "adv_failure_rate: lambda_": "$h_{adv.}(t;\\theta)$" - "failure_rate: lambda_": "$h_{ben.}(t;\\theta)$" - "model.trainer.nb_epoch: lambda_": "No. of Epochs" - "model.trainer.batch_size: lambda_": "Batch Size" + "Intercept: sigma_": "$\\rho$" + "Intercept: mu_": "$\\mu$" + "data.sample.random_state: mu_": "Random State" + "atk_value: mu_": "Attack Strength" + "train_time: mu_": "$t_{train}$" + "predict_proba_time: mu_": "$t_{predict}$" + "adv_accuracy: mu_": "Adv. Accuracy" + "accuracy: mu_": "Ben. Accuracy" + "adv_fit_time: mu_": "$t_{attack}$" + "adv_failure_rate: mu_": "$f_{adv.}(t;\\theta)$" + "failure_rate: mu_": "$f_{ben.}(t;\\theta)$" + "model.trainer.nb_epoch: mu_": "No. of Epochs" + "model.trainer.batch_size: mu_": "Batch Size" "def_gen": "Defence" + "model_layers: mu_" : "Layers" + "def_value: mu_" : "Defence Strength" + "predict_time: mu_" : "$t_{predict}$" partial_effect: - - "file": "log_normal_epochs_partial_effect.pdf" + - "file": "log_normal_epochs_partial_effect.eps" "covariate_array": "model.trainer.nb_epoch" "values_array": [1,10,25,50] "title": "$S(t)$ for Log-Normal AFR" @@ -161,13 +171,13 @@ log_normal: "title": "Epochs", "labels": ["1", "10", "25", "50"] } - - "file": "log_normal_layers_partial_effect.pdf" + - "file": "log_normal_layers_partial_effect.eps" "covariate_array": "model_layers" "values_array": [18, 34, 50, 101, 152] - "title": "$S(t)$ for Cox AFR" + "title": "$S(t)$ for Log Normal AFR" "ylabel": "Expectation of $S(t)$" "xlabel": "Time $T$ (seconds)" "legend_kwargs": { - "title": "ResNet Layers", + "title": "Layers", "labels": ["18", "34", "50", "101", "152"] } diff --git a/examples/pytorch/cifar10/conf/cifar10.yaml b/examples/pytorch/cifar10/conf/cifar10.yaml index 474fdfc6..5f701a6a 100644 --- a/examples/pytorch/cifar10/conf/cifar10.yaml +++ b/examples/pytorch/cifar10/conf/cifar10.yaml @@ -29,7 +29,6 @@ hydra: params: ++data.sample.random_state: choice(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) ++model.art.initialize.optimizer.lr: choice(10, 1, 0.1, 0.01, 0.001, .0001, .00001, 0.000001) - ++model.trainer.nb_epoch: choice(1, 10, 30, 50, 100) _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher diff --git a/examples/pytorch/cifar10/conf/clean.yaml b/examples/pytorch/cifar10/conf/clean.yaml index 7843aba8..e9c215ab 100644 --- a/examples/pytorch/cifar10/conf/clean.yaml +++ b/examples/pytorch/cifar10/conf/clean.yaml @@ -11,7 +11,7 @@ defences: GaussianAugmentation: Gauss-in GaussianNoise: Gauss-out HighConfidence: Conf - nb_epoch: Epochs + nb_epoch : Epochs model_layers: Control params: Deep: attack.init.kwargs.nb_grads @@ -26,5 +26,9 @@ params: Gauss-in: model.art.pipeline.preprocessor.kwargs.sigma Control: model_layers Epochs: model.trainer.nb_epoch + control: + model_layers: 18 + defaults: + model.trainer.nb_epoch: 20 fillna: - Epochs: 20 + model.trainer.nb_epoch : 20 diff --git a/examples/pytorch/cifar10/conf/compile.yaml b/examples/pytorch/cifar10/conf/compile.yaml deleted file mode 100644 index 1314aceb..00000000 --- a/examples/pytorch/cifar10/conf/compile.yaml +++ /dev/null @@ -1,33 +0,0 @@ -attacks: - # CarliniL0Method: CW_0 - # CarliniL2Method: CW_2 - # CarliniLInfMethod: CW_inf - DeepFool: Deep - FastGradientMethod: FGM - HopSkipJump: HSJ - PixelAttack: Pixel - ProjectedGradientDescent: PGD - ThresholdAttack: Thresh -defences: - Control: Control - FeatureSqueezing: FSQ - GaussianAugmentation: Gauss-in - GaussianNoise: Gauss-out - HighConfidence: Conf - Epochs: Epochs -params: - # art.attacks.evasion.CarliniL0Method: attack.init.confidence - # art.attacks.evasion.CarliniL2Method: attack.init.confidence - # art.attacks.evasion.CarliniLInfMethod: attack.init.confidence - Deep: attack.init.nb_grads - FGM: attack.init.eps - HSJ: attack.init.max_iter - Pixel: attack.init.th - PGD: attack.init.eps - Thresh: attack.init.th - Gauss-out: model.art.pipeline.postprocessor.scale - Conf: model.art.pipeline.postprocessor.cutoff - FSQ: model.art.pipeline.preprocessor.bit_depth - Gauss-in: model.art.pipeline.preprocessor.sigma - Control: model_layers - Epochs: model.trainer.nb_epoch diff --git a/examples/pytorch/cifar10/conf/plots.yaml b/examples/pytorch/cifar10/conf/plots.yaml index 464f53b3..57c016f7 100644 --- a/examples/pytorch/cifar10/conf/plots.yaml +++ b/examples/pytorch/cifar10/conf/plots.yaml @@ -1,5 +1,5 @@ cat_plot: -- file: adv_accuracy_vs_defence_type.pdf +- file: adv_accuracy_vs_defence_type.eps hue: model_name kind: boxen set: @@ -16,7 +16,7 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: ben_accuracy_vs_defence_type.pdf +- file: ben_accuracy_vs_defence_type.eps hue: model_name kind: boxen titles: Ben. Accuracy vs Defence Type @@ -31,7 +31,7 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: ben_failures_per_train_time_vs_defence_type.pdf +- file: ben_failures_per_train_time_vs_defence_type.eps hue: model_name kind: boxen set: @@ -48,11 +48,11 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: adv_failures_per_train_time_vs_defence_type.pdf +- file: adv_failures_per_train_time_vs_defence_type.eps hue: model_name kind: boxen - set: - yscale: log + # set: + # yscale: log titles: $\bar{C}_{adv.}$ vs Defence Type x: def_gen xlabels: Defence Type @@ -65,12 +65,12 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: adv_failures_per_train_time_vs_attack_type.pdf +- file: adv_failures_per_train_time_vs_attack_type.eps hue: model_name kind: boxen legend_title: Model Name - set: - yscale: log + # set: + # yscale: log titles: $\bar{C}_{adv.}$ vs Attack Type x: atk_gen xlabels: Attack Type @@ -83,15 +83,15 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: adv_failures_per_test_time_vs_defence_type.pdf +- file: adv_failures_per_test_time_vs_defence_type.eps hue: model_name kind: boxen legend_title: Model Name - titles: $h_{adv}$ vs Defence Type + titles: $f_{adv}$ vs Defence Type x: def_gen xlabels: Defence Type y: adv_failure_rate - ylabels: $h_{adv.}$ + ylabels: $f_{adv.}$ rotation : 90 hue_order: - ResNet18 @@ -99,23 +99,7 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: adv_accuracy_vs_defence_type.pdf - hue: model_name - kind: boxen - legend_title: Model Name - titles: Adv. Accuracy vs Defence Type - x: def_gen - xlabels: Defence Type - y: adv_accuracy - ylabels: Adv. Ben. Accuracy - rotation : 90 - hue_order: - - ResNet18 - - ResNet34 - - ResNet50 - - ResNet101 - - ResNet152 -- file: adv_accuracy_vs_attack_type.pdf +- file: adv_accuracy_vs_attack_type.eps hue: model_name kind: boxen legend_title: Model Name @@ -131,17 +115,17 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: ben_failure_rate_vs_defence_type.pdf +- file: ben_failure_rate_vs_defence_type.eps hue: model_name kind: boxen legend_title: Model Name set: yscale: log - titles: $h_{ben}(t; \theta)$ vs Defence Type + titles: $f_{ben}(t; \theta)$ vs Defence Type x: def_gen xlabels: Defence Type y: failure_rate - ylabels: $h_{ben}(t; \theta)$ + ylabels: $f_{ben}(t; \theta)$ rotation : 90 hue_order: - ResNet18 @@ -150,7 +134,7 @@ cat_plot: - ResNet101 - ResNet152 line_plot: -- file: def_param_vs_accuracy.pdf +- file: def_param_vs_accuracy.eps hue: def_gen legend: {"bbox_to_anchor": [1.05, 1], "title": "Defence"} title: Ben. Accuracy vs Defence Strength @@ -160,15 +144,17 @@ line_plot: y: accuracy y_scale: ylabel: Ben. Accuracy - # hue_order: - # - Control - # - Conf - # - Epochs - # - Gauss-in - # - Gauss-out - # - Conf - # - FSQ -- file: def_param_vs_adv_accuracy.pdf + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars +- file: def_param_vs_adv_accuracy.eps hue: def_gen legend: {"bbox_to_anchor": [1.05, 1], "title": "Defence"} title: Adv. Accuracy vs Defence Strength @@ -178,33 +164,37 @@ line_plot: y: adv_accuracy y_scale: ylabel: Adv. Accuracy - # hue_order: - # - Control - # - Conf - # - Epochs - # - Gauss-in - # - Gauss-out - # - Conf - # - FSQ -- file: def_param_vs_adv_failure_rate.pdf + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars +- file: def_param_vs_adv_failure_rate.eps hue: def_gen legend: {"bbox_to_anchor": [1.05, 1], "title": "Defence"} - title: $h_{adv}$ vs Defence Strength + title: $f_{adv}$ vs Defence Strength x: def_value - x_scale: linear + x_scale: log xlabel: Defence Control Parameter y: adv_failure_rate y_scale: log - ylabel: $h_{adv.}$ - # hue_order: - # - Control - # - Conf - # - Epochs - # - Gauss-in - # - Gauss-out - # - Conf - # - FSQ -- file: atk_param_vs_accuracy.pdf + ylabel: $f_{adv.}$ + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars +- file: atk_param_vs_accuracy.eps hue: atk_gen legend: {bbox_to_anchor: [1.05, 1]} title: Adv. Accuracy vs Attack Strength @@ -221,16 +211,17 @@ line_plot: - HSJ - Pixel - Thresh - + errorbar: se + err_style: bars scatter_plot: - x: train_time_per_sample y: adv_failure_rate hue: model_name xlabel: $t_{train}$ - ylabel: $h_{adv}$ - title: $h_{adv}$ vs $t_{train}$ - file: adv_failure_rate_vs_train_time.pdf - y_scale: log + ylabel: $f_{adv}$ + title: $f_{adv}$ vs $t_{train}$ + file: adv_failure_rate_vs_train_time.eps + y_scale: linear x_scale: log legend: title: Model Name diff --git a/examples/pytorch/cifar10/dvc.lock b/examples/pytorch/cifar10/dvc.lock index f353aeed..e371d08c 100644 --- a/examples/pytorch/cifar10/dvc.lock +++ b/examples/pytorch/cifar10/dvc.lock @@ -1,501 +1,296 @@ schema: '2.0' stages: - train: - cmd: python -m deckard.layers.experiment train --config_file cifar10.yaml + clean@attack: + cmd: python -m deckard.layers.clean_data -i cifar/reports/attack.csv -o cifar/reports/clean_attack.csv + -c conf/clean.yaml + deps: + - path: cifar/reports/attack.csv + md5: f782978aea22d56dbd68fa9f04e4dfcf + size: 26091607 params: params.yaml: - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar10 - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - files: - _target_: deckard.base.files.FileConfig - adv_predictions_file: adv_predictions.json - attack_dir: attacks - attack_file: attack - attack_type: .pkl - data_dir: data - data_file: data - data_type: .pkl - directory: cifar - model_dir: models - model_file: model - model_type: .pt - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar10 - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - initialize: - clip_values: - - 0.0 - - 255.0 - criterion: - name: torch.nn.CrossEntropyLoss - optimizer: - lr: 0.01 - momentum: 0.9 - name: torch.optim.SGD - library: pytorch - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar10 - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - init: - _target_: deckard.base.model.ModelInitializer - name: torch_example.ResNet18 - num_channels: 3 - num_classes: 10 - library: pytorch - trainer: - batch_size: 1024 - nb_epoch: 100 - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + files.directory: cifar + files.reports: reports + conf/clean.yaml: + attacks: + DeepFool: Deep + FastGradientMethod: FGM + HopSkipJump: HSJ + PixelAttack: Pixel + ProjectedGradientDescent: PGD + ThresholdAttack: Thresh + defences: + Control: Control + FeatureSqueezing: FSQ + GaussianAugmentation: Gauss-in + GaussianNoise: Gauss-out + HighConfidence: Conf + nb_epoch: Epochs + model_layers: Control + fillna: + model.trainer.nb_epoch: 20 + params: + Deep: attack.init.kwargs.nb_grads + FGM: attack.init.kwargs.eps + HSJ: attack.init.kwargs.max_iter + Pixel: attack.init.kwargs.th + PGD: attack.init.kwargs.eps + Thresh: attack.init.kwargs.th + Gauss-out: model.art.pipeline.postprocessor.kwargs.scale + Conf: model.art.pipeline.postprocessor.kwargs.cutoff + FSQ: model.art.pipeline.preprocessor.kwargs.bit_depth + Gauss-in: model.art.pipeline.preprocessor.kwargs.sigma + Control: model_layers + Epochs: model.trainer.nb_epoch + control: + model_layers: 18 + defaults: + model.trainer.nb_epoch: 20 outs: - - path: cifar/data/data.pkl - md5: 6503fed5d4e6cc1163898c0ab6a863dd - size: 739680311 - - path: cifar/models/model.optimizer.pt - hash: md5 - md5: d46598fb7feec074c02bd0ed081184da - size: 44805933 - - path: cifar/models/model.pt - hash: md5 - md5: f5d11f93160ad27b8468efc0d71eb695 - size: 44811029 - - path: cifar/reports/train/default/predictions.json - hash: md5 - md5: 37f581850d9f6d491cb0d9025e620bf9 - size: 2439094 - - path: cifar/reports/train/default/score_dict.json - hash: md5 - md5: 055f95d856bc09b533eccb57314db0c4 - size: 397 - attack: - cmd: python -m deckard.layers.experiment attack --config_file cifar10.yaml + - path: cifar/reports/clean_attack.csv + md5: 81b63f1d5864d65bdaf71aed4ae4c2b0 + size: 14727230 + afr: + cmd: python -m deckard.layers.afr --dataset cifar --data_file cifar/reports/clean_attack.csv --target + adv_accuracy --duration_col predict_time --dataset cifar --config_file conf/afr.yaml + --plots_folder cifar/plots/ deps: - - path: cifar/data/data.pkl - hash: md5 - md5: 6503fed5d4e6cc1163898c0ab6a863dd - size: 739680311 - - path: cifar/models/model.pt - hash: md5 - md5: f5d11f93160ad27b8468efc0d71eb695 - size: 44811029 + - path: cifar/reports/clean_attack.csv + md5: 81b63f1d5864d65bdaf71aed4ae4c2b0 + size: 14727230 params: params.yaml: - attack: - _target_: deckard.base.attack.Attack - attack_size: 10 - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar10 - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - init: - _target_: deckard.base.attack.AttackInitializer - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar10 - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - initialize: - clip_values: - - 0.0 - - 255.0 - criterion: - name: torch.nn.CrossEntropyLoss - optimizer: - lr: 0.01 - momentum: 0.9 - name: torch.optim.SGD - library: pytorch - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar10 - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - init: - _target_: deckard.base.model.ModelInitializer - name: torch_example.ResNet18 - num_channels: 3 - num_classes: 10 - library: pytorch - trainer: - batch_size: 1024 - nb_epoch: 100 - name: art.attacks.evasion.HopSkipJump - method: evasion - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar10 - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - initialize: - clip_values: - - 0.0 - - 255.0 - criterion: - name: torch.nn.CrossEntropyLoss - optimizer: - lr: 0.01 - momentum: 0.9 - name: torch.optim.SGD - library: pytorch - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar10 - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - init: - _target_: deckard.base.model.ModelInitializer - name: torch_example.ResNet18 - num_channels: 3 - num_classes: 10 - library: pytorch - trainer: - batch_size: 1024 - nb_epoch: 100 - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar10 - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - files: - _target_: deckard.base.files.FileConfig - adv_predictions_file: adv_predictions.json - attack_dir: attacks - attack_file: attack - attack_type: .pkl - data_dir: data - data_file: data - data_type: .pkl - directory: cifar - model_dir: models - model_file: model - model_type: .pt - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar10 - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - initialize: - clip_values: - - 0.0 - - 255.0 - criterion: - name: torch.nn.CrossEntropyLoss - optimizer: - lr: 0.01 - momentum: 0.9 - name: torch.optim.SGD - library: pytorch - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar10 - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - init: - _target_: deckard.base.model.ModelInitializer - name: torch_example.ResNet18 - num_channels: 3 - num_classes: 10 - library: pytorch - trainer: - batch_size: 1024 - nb_epoch: 100 - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: cifar/attacks/attack.pkl - hash: md5 - md5: 4c6d7b56c319a2a3a8f4288873141a44 - size: 123046 - - path: cifar/reports/attack/default/adv_predictions.json - hash: md5 - md5: 0e905b6a95defafe1472cd1d329ed124 - size: 2136 - - path: cifar/reports/attack/default/score_dict.json - hash: md5 - md5: f40b5d8125bf8b6370a94fe65d43cffa - size: 458 - attacks@ResNet101: - cmd: bash attacks.sh ++attack.attack_size=100 ++model.init.name=torch_example.ResNet101 - stage=attack ++hydra.sweeper.storage=sqlite:///cifar/reports/attack/ResNet101.db - --config-name cifar10.yaml - deps: - - path: attacks.sh - hash: md5 - md5: 963c858a322d7a4990a92a25d5684c57 - size: 2907 - - path: cifar/reports/attack/default/score_dict.json - hash: md5 - md5: f40b5d8125bf8b6370a94fe65d43cffa - size: 458 - - path: models.sh - hash: md5 - md5: 02a4961b4afe7ba84c41e9ad49c30c83 - size: 2760 - outs: - - path: cifar/reports/attack/ResNet101.db - hash: md5 - md5: 268500e55100c8e2c0de628e8b66b612 - size: 819200 - attacks@ResNet152: - cmd: bash attacks.sh ++attack.attack_size=100 ++model.init.name=torch_example.ResNet152 - stage=attack ++hydra.sweeper.storage=sqlite:///cifar/reports/attack/ResNet152.db - --config-name cifar10.yaml - deps: - - path: attacks.sh - hash: md5 - md5: 963c858a322d7a4990a92a25d5684c57 - size: 2907 - - path: cifar/reports/attack/default/score_dict.json - hash: md5 - md5: f40b5d8125bf8b6370a94fe65d43cffa - size: 458 - - path: models.sh - hash: md5 - md5: 02a4961b4afe7ba84c41e9ad49c30c83 - size: 2760 - outs: - - path: cifar/reports/attack/ResNet152.db - hash: md5 - md5: 47684cf7d10b05f6343f58579fd05af3 - size: 249856 - attacks@ResNet18: - cmd: bash attacks.sh ++attack.attack_size=100 ++model.init.name=torch_example.ResNet18 - stage=attack ++hydra.sweeper.storage=sqlite:///cifar/reports/attack/ResNet18.db - --config-name cifar10.yaml - deps: - - path: attacks.sh - hash: md5 - md5: 963c858a322d7a4990a92a25d5684c57 - size: 2907 - - path: cifar/reports/attack/default/score_dict.json - hash: md5 - md5: f40b5d8125bf8b6370a94fe65d43cffa - size: 458 - - path: models.sh - hash: md5 - md5: 02a4961b4afe7ba84c41e9ad49c30c83 - size: 2760 - outs: - - path: cifar/reports/attack/ResNet18.db - hash: md5 - md5: bf2b93a31c49e96b219c23095504a7f1 - size: 819200 - attacks@ResNet34: - cmd: bash attacks.sh ++attack.attack_size=100 ++model.init.name=torch_example.ResNet34 - stage=attack ++hydra.sweeper.storage=sqlite:///cifar/reports/attack/ResNet34.db - --config-name cifar10.yaml - deps: - - path: attacks.sh - hash: md5 - md5: 963c858a322d7a4990a92a25d5684c57 - size: 2907 - - path: cifar/reports/attack/default/score_dict.json - hash: md5 - md5: f40b5d8125bf8b6370a94fe65d43cffa - size: 458 - - path: models.sh - hash: md5 - md5: 02a4961b4afe7ba84c41e9ad49c30c83 - size: 2760 - outs: - - path: cifar/reports/attack/ResNet34.db - hash: md5 - md5: 8de8f4dfcda52bb40f206cf3c4977dd5 - size: 819200 - attacks@ResNet50: - cmd: bash attacks.sh ++attack.attack_size=100 ++model.init.name=torch_example.ResNet50 - stage=attack ++hydra.sweeper.storage=sqlite:///cifar/reports/attack/ResNet50.db - --config-name cifar10.yaml - deps: - - path: attacks.sh - hash: md5 - md5: 963c858a322d7a4990a92a25d5684c57 - size: 2907 - - path: cifar/reports/attack/default/score_dict.json - hash: md5 - md5: f40b5d8125bf8b6370a94fe65d43cffa - size: 458 - - path: models.sh - hash: md5 - md5: 02a4961b4afe7ba84c41e9ad49c30c83 - size: 2760 - outs: - - path: cifar/reports/attack/ResNet50.db - hash: md5 - md5: 8adabcf8a15b13fc20ea31f58ae7388b - size: 1069056 - compile@attack: - cmd: python -m deckard.layers.compile --report_folder cifar/reports/attack --results_file - cifar/reports/attack.csv - deps: - - path: cifar/reports/attack/ - hash: md5 - md5: 8c7a44700932e363fa7482224eac15bb.dir - size: 9973372888 - nfiles: 34873 - - path: cifar/reports/attack/ResNet101.db - hash: md5 - md5: 268500e55100c8e2c0de628e8b66b612 - size: 819200 - - path: cifar/reports/attack/ResNet18.db - hash: md5 - md5: bf2b93a31c49e96b219c23095504a7f1 - size: 819200 - - path: cifar/reports/attack/ResNet34.db - hash: md5 - md5: 8de8f4dfcda52bb40f206cf3c4977dd5 - size: 819200 - - path: cifar/reports/attack/ResNet50.db - hash: md5 - md5: 8adabcf8a15b13fc20ea31f58ae7388b - size: 1069056 + files.directory: cifar + conf/afr.yaml: + covariates: + - adv_fit_time + - accuracy + - train_time + - atk_value + - def_value + - data.sample.random_state + - model_layers + - model.trainer.nb_epoch + - predict_time + log_logistic: + plot: + file: log_logistic_aft.eps + title: Log logistic AFR Model + labels: + 'Intercept: beta_': $\beta$ + 'Intercept: alpha_': $\alpha$ + 'data.sample.random_state: alpha_': Random State + 'atk_value: alpha_': Attack Strength + 'train_time: alpha_': $t_{train}$ + 'predict_proba_time: alpha_': $t_{predict}$ + 'adv_accuracy: alpha_': Adv. Accuracy + 'accuracy: alpha_': Ben. Accuracy + 'adv_fit_time: alpha_': $t_{attack}$ + 'adv_failure_rate: alpha_': $f_{adv.}(t;\theta)$ + 'failure_rate: alpha_': $f_{ben.}(t;\theta)$ + 'model.trainer.nb_epoch: alpha_': No. of Epochs + 'model.trainer.batch_size: alpha_': Batch Size + def_gen: Defence + 'model_layers: alpha_': Layers + 'def_value: alpha_': Defence Strength + 'predict_time: alpha_': $t_{predict}$ + partial_effect: + - file: log_logistic_epochs_partial_effect.eps + covariate_array: model.trainer.nb_epoch + values_array: + - 1 + - 10 + - 25 + - 50 + title: $S(t)$ for Log-Logistic AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Epochs + labels: + - '1' + - '10' + - '25' + - '50' + - file: log_logistic_layers_partial_effect.eps + covariate_array: model_layers + values_array: + - 18 + - 34 + - 50 + - 101 + - 152 + title: $S(t)$ for Log Logistic AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Layers + labels: + - '18' + - '34' + - '50' + - '101' + - '152' + log_normal: + plot: + file: log_normal_aft.eps + title: Log Normal AFR Model + labels: + 'Intercept: sigma_': $\rho$ + 'Intercept: mu_': $\mu$ + 'data.sample.random_state: mu_': Random State + 'atk_value: mu_': Attack Strength + 'train_time: mu_': $t_{train}$ + 'predict_proba_time: mu_': $t_{predict}$ + 'adv_accuracy: mu_': Adv. Accuracy + 'accuracy: mu_': Ben. Accuracy + 'adv_fit_time: mu_': $t_{attack}$ + 'adv_failure_rate: mu_': $f_{adv.}(t;\theta)$ + 'failure_rate: mu_': $f_{ben.}(t;\theta)$ + 'model.trainer.nb_epoch: mu_': No. of Epochs + 'model.trainer.batch_size: mu_': Batch Size + def_gen: Defence + 'model_layers: mu_': Layers + 'def_value: mu_': Defence Strength + 'predict_time: mu_': $t_{predict}$ + partial_effect: + - file: log_normal_epochs_partial_effect.eps + covariate_array: model.trainer.nb_epoch + values_array: + - 1 + - 10 + - 25 + - 50 + title: $S(t)$ for Log-Normal AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Epochs + labels: + - '1' + - '10' + - '25' + - '50' + - file: log_normal_layers_partial_effect.eps + covariate_array: model_layers + values_array: + - 18 + - 34 + - 50 + - 101 + - 152 + title: $S(t)$ for Log Normal AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Layers + labels: + - '18' + - '34' + - '50' + - '101' + - '152' + weibull: + plot: + file: weibull_aft.eps + title: Weibull AFR Model + labels: + 'Intercept: rho_': $\rho$ + 'Intercept: lambda_': $\lambda$ + 'data.sample.random_state: lambda_': Random State + 'atk_value: lambda_': Attack Strength + 'train_time: lambda_': $t_{train}$ + 'predict_proba_time: lambda_': $t_{predict}$ + 'adv_accuracy: lambda_': Adv. Accuracy + 'accuracy: lambda_': Ben. Accuracy + 'adv_fit_time: lambda_': $t_{attack}$ + 'adv_failure_rate: lambda_': $f_{adv.}(t;\theta)$ + 'failure_rate: lambda_': $f_{ben.}(t;\theta)$ + 'model.trainer.nb_epoch: lambda_': No. of Epochs + 'model.trainer.batch_size: lambda_': Batch Size + def_gen: Defence + 'model_layers: lambda_': Layers + 'def_value: lambda_': Defence Strength + 'predict_time: lambda_': $t_{predict}$ + partial_effect: + - file: weibull_epochs_partial_effect.eps + covariate_array: model.trainer.nb_epoch + values_array: + - 1 + - 10 + - 25 + - 50 + title: $S(t)$ for Weibull AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Epochs + labels: + - '1' + - '10' + - '25' + - '50' + - file: weibull_layers_partial_effect.eps + covariate_array: model_layers + values_array: + - 18 + - 34 + - 50 + - 101 + - 152 + title: $S(t)$ for Weibull AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Layers + labels: + - '18' + - '34' + - '50' + - '101' + - '152' outs: - - path: cifar/reports/attack.csv - hash: md5 - md5: 51d3b7360885430ef15c78e66a85393a - size: 26068128 + - path: cifar/plots/log_logistic_aft.eps + md5: 25c507a344b0589027e50473873485f4 + size: 44483 + - path: cifar/plots/log_logistic_epochs_partial_effect.eps + md5: 58ad2cc4455c7a2e14dd8baf5e0bebbc + size: 43547 + - path: cifar/plots/log_logistic_layers_partial_effect.eps + md5: c1ec42636bdbf6ba5a3279da4ff03226 + size: 46098 + - path: cifar/plots/log_normal_aft.eps + md5: c10aef0b38507ea46d0bcab12edd43d7 + size: 43898 + - path: cifar/plots/log_normal_epochs_partial_effect.eps + md5: c7e71cb7f8673260b6ba330e228f9897 + size: 44094 + - path: cifar/plots/log_normal_layers_partial_effect.eps + md5: c516900f508281933e52f08fc6e4cec3 + size: 46510 + - path: cifar/plots/weibull_aft.eps + md5: 42a59a94ef52a0bd0260cc1fe2113bc3 + size: 41411 + - path: cifar/plots/weibull_epochs_partial_effect.eps + md5: e5397975c6e5b322a41b7c5918e278cf + size: 43063 + - path: cifar/plots/weibull_layers_partial_effect.eps + md5: 00b0191fdd4ac6f407a33374dbb7a06b + size: 45397 plot: cmd: python -m deckard.layers.plots --path cifar/plots/ --file cifar/reports/clean_attack.csv -c conf/plots.yaml deps: - path: cifar/reports/clean_attack.csv - hash: md5 - md5: 726a596355273abf4a5172268bf69c62 - size: 14801171 + md5: 81b63f1d5864d65bdaf71aed4ae4c2b0 + size: 14727230 params: params.yaml: files.directory: cifar files.reports: reports conf/plots.yaml: cat_plot: - - file: adv_accuracy_vs_defence_type.pdf + - file: adv_accuracy_vs_defence_type.eps hue: model_name kind: boxen set: @@ -512,7 +307,7 @@ stages: - ResNet50 - ResNet101 - ResNet152 - - file: ben_accuracy_vs_defence_type.pdf + - file: ben_accuracy_vs_defence_type.eps hue: model_name kind: boxen titles: Ben. Accuracy vs Defence Type @@ -527,7 +322,7 @@ stages: - ResNet50 - ResNet101 - ResNet152 - - file: ben_failures_per_train_time_vs_defence_type.pdf + - file: ben_failures_per_train_time_vs_defence_type.eps hue: model_name kind: boxen set: @@ -544,11 +339,9 @@ stages: - ResNet50 - ResNet101 - ResNet152 - - file: adv_failures_per_train_time_vs_defence_type.pdf + - file: adv_failures_per_train_time_vs_defence_type.eps hue: model_name kind: boxen - set: - yscale: log titles: $\bar{C}_{adv.}$ vs Defence Type x: def_gen xlabels: Defence Type @@ -561,12 +354,10 @@ stages: - ResNet50 - ResNet101 - ResNet152 - - file: adv_failures_per_train_time_vs_attack_type.pdf + - file: adv_failures_per_train_time_vs_attack_type.eps hue: model_name kind: boxen legend_title: Model Name - set: - yscale: log titles: $\bar{C}_{adv.}$ vs Attack Type x: atk_gen xlabels: Attack Type @@ -579,31 +370,15 @@ stages: - ResNet50 - ResNet101 - ResNet152 - - file: adv_failures_per_test_time_vs_defence_type.pdf + - file: adv_failures_per_test_time_vs_defence_type.eps hue: model_name kind: boxen legend_title: Model Name - titles: $h_{adv}$ vs Defence Type + titles: $f_{adv}$ vs Defence Type x: def_gen xlabels: Defence Type y: adv_failure_rate - ylabels: $h_{adv.}$ - rotation: 90 - hue_order: - - ResNet18 - - ResNet34 - - ResNet50 - - ResNet101 - - ResNet152 - - file: adv_accuracy_vs_defence_type.pdf - hue: model_name - kind: boxen - legend_title: Model Name - titles: Adv. Accuracy vs Defence Type - x: def_gen - xlabels: Defence Type - y: adv_accuracy - ylabels: Adv. Ben. Accuracy + ylabels: $f_{adv.}$ rotation: 90 hue_order: - ResNet18 @@ -611,7 +386,7 @@ stages: - ResNet50 - ResNet101 - ResNet152 - - file: adv_accuracy_vs_attack_type.pdf + - file: adv_accuracy_vs_attack_type.eps hue: model_name kind: boxen legend_title: Model Name @@ -627,17 +402,17 @@ stages: - ResNet50 - ResNet101 - ResNet152 - - file: ben_failure_rate_vs_defence_type.pdf + - file: ben_failure_rate_vs_defence_type.eps hue: model_name kind: boxen legend_title: Model Name set: yscale: log - titles: $h_{ben}(t; \theta)$ vs Defence Type + titles: $f_{ben}(t; \theta)$ vs Defence Type x: def_gen xlabels: Defence Type y: failure_rate - ylabels: $h_{ben}(t; \theta)$ + ylabels: $f_{ben}(t; \theta)$ rotation: 90 hue_order: - ResNet18 @@ -646,7 +421,7 @@ stages: - ResNet101 - ResNet152 line_plot: - - file: def_param_vs_accuracy.pdf + - file: def_param_vs_accuracy.eps hue: def_gen legend: bbox_to_anchor: @@ -660,7 +435,17 @@ stages: y: accuracy y_scale: ylabel: Ben. Accuracy - - file: def_param_vs_adv_accuracy.pdf + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars + - file: def_param_vs_adv_accuracy.eps hue: def_gen legend: bbox_to_anchor: @@ -674,21 +459,41 @@ stages: y: adv_accuracy y_scale: ylabel: Adv. Accuracy - - file: def_param_vs_adv_failure_rate.pdf + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars + - file: def_param_vs_adv_failure_rate.eps hue: def_gen legend: bbox_to_anchor: - 1.05 - 1 title: Defence - title: $h_{adv}$ vs Defence Strength + title: $f_{adv}$ vs Defence Strength x: def_value - x_scale: linear + x_scale: log xlabel: Defence Control Parameter y: adv_failure_rate y_scale: log - ylabel: $h_{adv.}$ - - file: atk_param_vs_accuracy.pdf + ylabel: $f_{adv.}$ + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars + - file: atk_param_vs_accuracy.eps hue: atk_gen legend: bbox_to_anchor: @@ -708,15 +513,17 @@ stages: - HSJ - Pixel - Thresh + errorbar: se + err_style: bars scatter_plot: - x: train_time_per_sample y: adv_failure_rate hue: model_name xlabel: $t_{train}$ - ylabel: $h_{adv}$ - title: $h_{adv}$ vs $t_{train}$ - file: adv_failure_rate_vs_train_time.pdf - y_scale: log + ylabel: $f_{adv}$ + title: $f_{adv}$ vs $t_{train}$ + file: adv_failure_rate_vs_train_time.eps + y_scale: linear x_scale: log legend: title: Model Name @@ -730,343 +537,49 @@ stages: - ResNet101 - ResNet152 outs: - - path: cifar/plots/adv_accuracy_vs_attack_type.pdf - hash: md5 - md5: 2ad8aa1137712e792fc9339cd9409d54 - size: 32194 - - path: cifar/plots/adv_accuracy_vs_defence_type.pdf - hash: md5 - md5: 5c408e76ac388859dbb9cad4e2ddf968 - size: 32506 - - path: cifar/plots/adv_failure_rate_vs_train_time.pdf - hash: md5 - md5: 351d7f3bf8b5933c8ee3031447f48658 - size: 80138 - - path: cifar/plots/adv_failures_per_test_time_vs_defence_type.pdf - hash: md5 - md5: 4c1bd0364c0e2d3f372714202c05f911 - size: 39583 - - path: cifar/plots/adv_failures_per_train_time_vs_attack_type.pdf - hash: md5 - md5: 57ff7639a8f432aea0b4195e33d5ba96 - size: 40766 - - path: cifar/plots/adv_failures_per_train_time_vs_defence_type.pdf - hash: md5 - md5: ba23e1a31c95519b640bd2ab7edfea4b - size: 36881 - - path: cifar/plots/atk_param_vs_accuracy.pdf - hash: md5 - md5: 3bf276eed7ed8172d48896efd29991ac - size: 20181 - - path: cifar/plots/ben_accuracy_vs_defence_type.pdf - hash: md5 - md5: d182987f8164dc47305649c193506f10 - size: 29771 - - path: cifar/plots/ben_failure_rate_vs_defence_type.pdf - hash: md5 - md5: a4bdbad3210e9da50098d068792673cc - size: 41749 - - path: cifar/plots/ben_failures_per_train_time_vs_defence_type.pdf - hash: md5 - md5: 900f4362e3e0ffa76a6d0f7f99d52051 - size: 36648 - - path: cifar/plots/def_param_vs_accuracy.pdf - hash: md5 - md5: f7426fca89038c2f19da4afc9c41eab4 - size: 19712 - - path: cifar/plots/def_param_vs_adv_accuracy.pdf - hash: md5 - md5: bda0d408b829c40e17e04f768852fd44 - size: 19958 - - path: cifar/plots/def_param_vs_adv_failure_rate.pdf - hash: md5 - md5: 8b2e7a00615d225bdd9cfa0fdd3c7f36 - size: 23544 - afr: - cmd: python -m deckard.layers.afr --dataset cifar --data_file cifar/reports/clean_attack.csv --duration_col - adv_fit_time --dataset mnist --config_file conf/afr.yaml --plots_folder cifar/plots/ - deps: - - path: cifar/plots/adv_accuracy_vs_defence_type.pdf - hash: md5 - md5: 5c408e76ac388859dbb9cad4e2ddf968 - size: 32506 - - path: cifar/reports/clean_attack.csv - hash: md5 - md5: 726a596355273abf4a5172268bf69c62 - size: 14801171 - params: - params.yaml: - files.directory: cifar - conf/afr.yaml: - covariates: - - accuracy - - train_time - - predict_time - - atk_value - - def_value - - data.sample.random_state - - adv_failure_rate - - model_layers - - adv_fit_time - - model.trainer.nb_epoch - log_logistic: - plot: - file: log_logistic_aft.pdf - title: Log logistic AFR Model - labels: - 'Intercept: rho_': $\rho$ - 'Intercept: lambda_': $\lambda$ - 'data.sample.random_state: lambda_': Random State - 'atk_value: lambda_': Attack Strength - 'train_time: lambda_': $t_{train}$ - 'predict_proba_time: lambda_': $t_{predict}$ - 'adv_accuracy: lambda_': Adv. Accuracy - 'accuracy: lambda_': Ben. Accuracy - 'adv_fit_time: lambda_': $t_{attack}$ - 'adv_failure_rate: lambda_': $h_{adv.}(t;\theta)$ - 'failure_rate: lambda_': $h_{ben.}(t;\theta)$ - 'model.trainer.nb_epoch: lambda_': No. of Epochs - 'model.trainer.batch_size: lambda_': Batch Size - def_gen: Defence - partial_effect: - - file: log_logistic_epochs_partial_effect.pdf - covariate_array: model.trainer.nb_epoch - values_array: - - 1 - - 10 - - 25 - - 50 - title: $S(t)$ for Log-Logistic AFR - ylabel: Expectation of $S(t)$ - xlabel: Time $T$ (seconds) - legend_kwargs: - title: Epochs - labels: - - '1' - - '10' - - '25' - - '50' - - file: log_logistic_layers_partial_effect.pdf - covariate_array: model_layers - values_array: - - 18 - - 34 - - 50 - - 101 - - 152 - title: $S(t)$ for Cox AFR - ylabel: Expectation of $S(t)$ - xlabel: Time $T$ (seconds) - legend_kwargs: - title: ResNet Layers - labels: - - '18' - - '34' - - '50' - - '101' - - '152' - log_normal: - plot: - file: log_normal_aft.pdf - title: Log Normal AFR Model - labels: - 'Intercept: rho_': $\rho$ - 'Intercept: lambda_': $\lambda$ - 'data.sample.random_state: lambda_': Random State - 'atk_value: lambda_': Attack Strength - 'train_time: lambda_': $t_{train}$ - 'predict_proba_time: lambda_': $t_{predict}$ - 'adv_accuracy: lambda_': Adv. Accuracy - 'accuracy: lambda_': Ben. Accuracy - 'adv_fit_time: lambda_': $t_{attack}$ - 'adv_failure_rate: lambda_': $h_{adv.}(t;\theta)$ - 'failure_rate: lambda_': $h_{ben.}(t;\theta)$ - 'model.trainer.nb_epoch: lambda_': No. of Epochs - 'model.trainer.batch_size: lambda_': Batch Size - def_gen: Defence - partial_effect: - - file: log_normal_epochs_partial_effect.pdf - covariate_array: model.trainer.nb_epoch - values_array: - - 1 - - 10 - - 25 - - 50 - title: $S(t)$ for Log-Normal AFR - ylabel: Expectation of $S(t)$ - xlabel: Time $T$ (seconds) - legend_kwargs: - title: Epochs - labels: - - '1' - - '10' - - '25' - - '50' - - file: log_normal_layers_partial_effect.pdf - covariate_array: model_layers - values_array: - - 18 - - 34 - - 50 - - 101 - - 152 - title: $S(t)$ for Cox AFR - ylabel: Expectation of $S(t)$ - xlabel: Time $T$ (seconds) - legend_kwargs: - title: ResNet Layers - labels: - - '18' - - '34' - - '50' - - '101' - - '152' - weibull: - plot: - file: weibull_aft.pdf - title: Weibull AFR Model - labels: - 'Intercept: rho_': $\rho$ - 'Intercept: lambda_': $\lambda$ - 'data.sample.random_state: lambda_': Random State - 'atk_value: lambda_': Attack Strength - 'train_time: lambda_': $t_{train}$ - 'predict_proba_time: lambda_': $t_{predict}$ - 'adv_accuracy: lambda_': Adv. Accuracy - 'accuracy: lambda_': Ben. Accuracy - 'adv_fit_time: lambda_': $t_{attack}$ - 'adv_failure_rate: lambda_': $h_{adv.}(t;\theta)$ - 'failure_rate: lambda_': $h_{ben.}(t;\theta)$ - 'model.trainer.nb_epoch: lambda_': No. of Epochs - 'model.trainer.batch_size: lambda_': Batch Size - def_gen: Defence - partial_effect: - - file: weibull_epochs_partial_effect.pdf - covariate_array: model.trainer.nb_epoch - values_array: - - 1 - - 10 - - 25 - - 50 - title: $S(t)$ for Weibull AFR - ylabel: Expectation of $S(t)$ - xlabel: Time $T$ (seconds) - legend_kwargs: - title: Epochs - labels: - - '1' - - '10' - - '25' - - '50' - - file: weibull_layers_partial_effect.pdf - covariate_array: model_layers - values_array: - - 18 - - 34 - - 50 - - 101 - - 152 - title: $S(t)$ for Cox AFR - ylabel: Expectation of $S(t)$ - xlabel: Time $T$ (seconds) - legend_kwargs: - title: ResNet Layers - labels: - - '18' - - '34' - - '50' - - '101' - - '152' - outs: - - path: cifar/plots/log_logistic_aft.pdf - hash: md5 - md5: 2b28871d23665f9745e4d8cb1fef3a7c - size: 23397 - - path: cifar/plots/log_logistic_epochs_partial_effect.pdf - hash: md5 - md5: 820bec8e7d11f46984c7642e797c00d1 - size: 27103 - - path: cifar/plots/log_logistic_layers_partial_effect.pdf - hash: md5 - md5: 38c30a96926e95f5cbb810e1922d1ce3 - size: 28955 - - path: cifar/plots/log_normal_aft.pdf - hash: md5 - md5: df4c2cc42d194386fafea2ef78411bf7 - size: 23817 - - path: cifar/plots/log_normal_epochs_partial_effect.pdf - hash: md5 - md5: c3ffc8e94ccd522d1a8737a4e3c1436f - size: 28672 - - path: cifar/plots/log_normal_layers_partial_effect.pdf - hash: md5 - md5: 247006b93ddadc97a3849cfce5d2ea00 - size: 28982 - - path: cifar/plots/weibull_aft.pdf - hash: md5 - md5: a4759653f442f4acb739f80f4e763600 - size: 33056 - - path: cifar/plots/weibull_epochs_partial_effect.pdf - hash: md5 - md5: 0ddb5d5038243152cd0d68d6dffeb88a - size: 28141 - - path: cifar/plots/weibull_layers_partial_effect.pdf - hash: md5 - md5: e80a372b96f8845791f00a8a7b2f571b - size: 28960 + - path: cifar/plots/adv_accuracy_vs_attack_type.eps + md5: 5138387d43aa6b80861beda375a668a4 + size: 108266 + - path: cifar/plots/adv_accuracy_vs_defence_type.eps + md5: 413761c7484aed43f3d3f6f75da81e82 + size: 101582 + - path: cifar/plots/adv_failure_rate_vs_train_time.eps + md5: 2cdddda5ce788d5813ca55880fdb7a91 + size: 631508 + - path: cifar/plots/adv_failures_per_test_time_vs_defence_type.eps + md5: 23aff41fad24c912f0301f2e986ae96d + size: 121711 + - path: cifar/plots/adv_failures_per_train_time_vs_attack_type.eps + md5: 85efb2307112f220946f0c4b623df366 + size: 120765 + - path: cifar/plots/adv_failures_per_train_time_vs_defence_type.eps + md5: f914efca4568b2f5163b7e8723f0df5c + size: 113874 + - path: cifar/plots/atk_param_vs_accuracy.eps + md5: 35ee04493de38424453b9514c45d66f1 + size: 39212 + - path: cifar/plots/ben_accuracy_vs_defence_type.eps + md5: 61569c543a2891c9fcdc95314924ceb4 + size: 96824 + - path: cifar/plots/ben_failure_rate_vs_defence_type.eps + md5: 792603cf59c01c922408c18d1dfeda3e + size: 122740 + - path: cifar/plots/ben_failures_per_train_time_vs_defence_type.eps + md5: 25f8992268b8eefa2eaa60b73e1f6370 + size: 112749 + - path: cifar/plots/def_param_vs_accuracy.eps + md5: 21b2ace7923bdeb9c5bb950caf2307f1 + size: 38940 + - path: cifar/plots/def_param_vs_adv_accuracy.eps + md5: 43d5d57465019a4067af163be4178fa8 + size: 38631 + - path: cifar/plots/def_param_vs_adv_failure_rate.eps + md5: 2a6cfab767a224847deb07eb1d451b4a + size: 39304 copy_results: - cmd: cp -r cifar/plots/* ~/ml_afr/cifar/ + cmd: mkdir -p ~/ml_afr/cifar/ && cp -r cifar/plots/* ~/ml_afr/cifar/ deps: - path: cifar/plots/ - hash: md5 - md5: 7531488748e5e0453ac9eb36d844097e.dir - size: 15075359 + md5: c22ad5afc78d35f682d247d6bd61643f.dir + size: 16455580 nfiles: 25 - clean@attack: - cmd: python -m deckard.layers.clean_data -i cifar/reports/attack.csv -o cifar/reports/clean_attack.csv - -c conf/clean.yaml - deps: - - path: cifar/reports/attack.csv - hash: md5 - md5: 51d3b7360885430ef15c78e66a85393a - size: 26068128 - params: - params.yaml: - files.directory: cifar - files.reports: reports - conf/clean.yaml: - attacks: - DeepFool: Deep - FastGradientMethod: FGM - HopSkipJump: HSJ - PixelAttack: Pixel - ProjectedGradientDescent: PGD - ThresholdAttack: Thresh - defences: - Control: Control - FeatureSqueezing: FSQ - GaussianAugmentation: Gauss-in - GaussianNoise: Gauss-out - HighConfidence: Conf - nb_epoch: Epochs - model_layers: Control - fillna: - Epochs: 20 - params: - Deep: attack.init.kwargs.nb_grads - FGM: attack.init.kwargs.eps - HSJ: attack.init.kwargs.max_iter - Pixel: attack.init.kwargs.th - PGD: attack.init.kwargs.eps - Thresh: attack.init.kwargs.th - Gauss-out: model.art.pipeline.postprocessor.kwargs.scale - Conf: model.art.pipeline.postprocessor.kwargs.cutoff - FSQ: model.art.pipeline.preprocessor.kwargs.bit_depth - Gauss-in: model.art.pipeline.preprocessor.kwargs.sigma - Control: model_layers - Epochs: model.trainer.nb_epoch - outs: - - path: cifar/reports/clean_attack.csv - hash: md5 - md5: 726a596355273abf4a5172268bf69c62 - size: 14801171 diff --git a/examples/pytorch/cifar10/dvc.yaml b/examples/pytorch/cifar10/dvc.yaml index 7f6b417d..6b445d4c 100644 --- a/examples/pytorch/cifar10/dvc.yaml +++ b/examples/pytorch/cifar10/dvc.yaml @@ -12,7 +12,7 @@ vars: - conf/clean.yaml:fillna stages: train: - cmd: python -m deckard.layers.experiment train --config_file cifar10.yaml + cmd: python -m deckard.layers.experiment train --config_file cifar.yaml params: - data - model @@ -29,7 +29,7 @@ stages: metrics: - ${files.directory}/${files.reports}/train/${files.name}/${files.score_dict_file} attack: - cmd: python -m deckard.layers.experiment attack --config_file cifar10.yaml + cmd: python -m deckard.layers.experiment attack --config_file cifar.yaml params: - data - model @@ -55,7 +55,7 @@ stages: # # - ResNet101 # # - ResNet152 # do: # This script configures eazch defence - # cmd: bash models.sh ++model.init.name=torch_example.${item} stage=train ++hydra.sweeper.storage=sqlite:///${files.directory}/${files.reports}/train/${item}.db --config-name cifar10.yaml + # cmd: bash models.sh ++model.init.name=torch_example.${item} stage=train ++hydra.sweeper.storage=sqlite:///${files.directory}/${files.reports}/train/${item}.db --config-name cifar.yaml # deps: # - models.sh # - ${files.directory}/${files.model_dir}/${files.model_file}${files.model_type} @@ -72,7 +72,7 @@ stages: - ResNet101 - ResNet152 do: - cmd: bash attacks.sh ++attack.attack_size=100 ++model.init.name=torch_example.${item} stage=attack ++hydra.sweeper.storage=sqlite:///${files.directory}/${files.reports}/attack/${item}.db --config-name cifar10.yaml + cmd: bash attacks.sh ++attack.attack_size=100 ++model.init.name=torch_example.${item} stage=attack ++hydra.sweeper.storage=sqlite:///${files.directory}/${files.reports}/attack/${item}.db --config-name cifar.yaml deps: - models.sh # This script configures each defence - attacks.sh # This script configures each attack @@ -94,7 +94,7 @@ stages: - ${files.directory}/${files.reports}/${item}/ResNet34.db - ${files.directory}/${files.reports}/${item}/ResNet50.db - ${files.directory}/${files.reports}/${item}/ResNet101.db - # - ${files.directory}/${files.reports}/${item}/ResNet152.db + - ${files.directory}/${files.reports}/${item}/ResNet152.db outs: - ${files.directory}/${files.reports}/${item}.csv clean: @@ -128,7 +128,6 @@ stages: - ${files.directory}/plots/${cat_plot[5].file} - ${files.directory}/plots/${cat_plot[6].file} - ${files.directory}/plots/${cat_plot[7].file} - - ${files.directory}/plots/${cat_plot[8].file} - ${files.directory}/plots/${line_plot[0].file} - ${files.directory}/plots/${line_plot[1].file} - ${files.directory}/plots/${line_plot[2].file} @@ -142,20 +141,19 @@ stages: - scatter_plot - cat_plot afr: - cmd: python -m deckard.layers.afr --dataset ${files.directory} --data_file ${files.directory}/${files.reports}/clean_attack.csv --duration_col adv_fit_time --dataset mnist --config_file conf/afr.yaml --plots_folder ${files.directory}/plots/ + cmd: python -m deckard.layers.afr --dataset ${files.directory} --data_file ${files.directory}/${files.reports}/clean_attack.csv --target adv_accuracy --duration_col predict_time --dataset cifar --config_file conf/afr.yaml --plots_folder ${files.directory}/plots/ deps: - ${files.directory}/${files.reports}/clean_attack.csv - - ${files.directory}/plots/${cat_plot[0].file} plots: - - ${files.directory}/plots/weibull_aft.pdf - - ${files.directory}/plots/weibull_epochs_partial_effect.pdf - - ${files.directory}/plots/weibull_layers_partial_effect.pdf - - ${files.directory}/plots/log_logistic_aft.pdf - - ${files.directory}/plots/log_logistic_epochs_partial_effect.pdf - - ${files.directory}/plots/log_logistic_layers_partial_effect.pdf - - ${files.directory}/plots/log_normal_aft.pdf - - ${files.directory}/plots/log_normal_epochs_partial_effect.pdf - - ${files.directory}/plots/log_normal_layers_partial_effect.pdf + - ${files.directory}/plots/weibull_aft.eps + - ${files.directory}/plots/weibull_epochs_partial_effect.eps + - ${files.directory}/plots/weibull_layers_partial_effect.eps + - ${files.directory}/plots/log_logistic_aft.eps + - ${files.directory}/plots/log_logistic_epochs_partial_effect.eps + - ${files.directory}/plots/log_logistic_layers_partial_effect.eps + - ${files.directory}/plots/log_normal_aft.eps + - ${files.directory}/plots/log_normal_epochs_partial_effect.eps + - ${files.directory}/plots/log_normal_layers_partial_effect.eps params: - files.directory - conf/afr.yaml: @@ -164,6 +162,7 @@ stages: - log_logistic - log_normal copy_results: - cmd: cp -r ${files.directory}/plots/* ~/ml_afr/cifar/ + cmd: mkdir -p ~/ml_afr/cifar/ && cp -r ${files.directory}/plots/* ~/ml_afr/cifar/ deps: - ${files.directory}/plots/ + diff --git a/examples/pytorch/cifar10/models.sh b/examples/pytorch/cifar10/models.sh index 8d64d588..e8e0944f 100644 --- a/examples/pytorch/cifar10/models.sh +++ b/examples/pytorch/cifar10/models.sh @@ -3,36 +3,32 @@ # This script is used to generate the models for the sklearn example. # # Default model -echo "python -m deckard.layers.optimise " $@ "--multirun" -python -m deckard.layers.optimise $@ --multirun +echo "python -m deckard.layers.optimise ++model.trainer.nb_epoch=1,10,30,50,100" $@ "--multirun" +python -m deckard.layers.optimise ++model.trainer.nb_epoch=1,10,30,50,100 $@ --multirun # # This line generates the model and adds the FeatureSqueezing preprocessing defence. -# python -m deckard.layers.optimise ++model.art.preprocessor.name=art.defences.preprocessor.FeatureSqueezing +model.art.preprocessor.params.bit_depth=4,8,16,32,64 +model.art.preprocessor.params.clip_values=[0,255] ++hydra.sweeper.study_name=FSQ $@ --multirun - -# # # Gaussian Augmentation (Input) -# python -m deckard.layers.optimise ++model.art.preprocessor.name=art.defences.preprocessor.GaussianAugmentation +model.art.preprocessor.params.sigma=.01,.1,.3,.5,1 +model.art.preprocessor.params.ratio=.5 +model.art.preprocessor.params.augmentation=False ++hydra.sweeper.study_name=gauss-in $@ --multirun - -# # # # Gaussian Noise (Output) -# python -m deckard.layers.optimise ++model.art.postprocessor.name=art.defences.postprocessor.GaussianNoise ++model.art.postprocessor.params.scale=.01,.1,.3,.5,1 ++hydra.sweeper.study_name=gauss-out $@ --multirun - -# # # # High Confidence -# python -m deckard.layers.optimise +model.art.postprocessor.name=art.defences.postprocessor.HighConfidence +model.art.postprocessor.params.cutoff=.1,.3,.5,.9,.99 ++hydra.sweeper.study_name=conf $@ --multirun -#!/bin/bash - -# This script is used to generate the models for the sklearn example. - -# # Default model -echo "python -m deckard.layers.optimise " $@ "--multirun" -python -m deckard.layers.optimise $@ --multirun - -# # This line generates the model and adds the FeatureSqueezing preprocessing defence. -# python -m deckard.layers.optimise ++model.art.preprocessor.name=art.defences.preprocessor.FeatureSqueezing +model.art.preprocessor.params.bit_depth=4,8,16,32,64 +model.art.preprocessor.params.clip_values=[0,255] ++hydra.sweeper.study_name=FSQ $@ --multirun - -# # # Gaussian Augmentation (Input) -# python -m deckard.layers.optimise ++model.art.preprocessor.name=art.defences.preprocessor.GaussianAugmentation +model.art.preprocessor.params.sigma=.01,.1,.3,.5,1 +model.art.preprocessor.params.ratio=.5 +model.art.preprocessor.params.augmentation=False ++hydra.sweeper.study_name=gauss-in $@ --multirun - -# # # # Gaussian Noise (Output) -# python -m deckard.layers.optimise ++model.art.postprocessor.name=art.defences.postprocessor.GaussianNoise ++model.art.postprocessor.params.scale=.01,.1,.3,.5,1 ++hydra.sweeper.study_name=gauss-out $@ --multirun - -# # # # High Confidence -# python -m deckard.layers.optimise +model.art.postprocessor.name=art.defences.postprocessor.HighConfidence +model.art.postprocessor.params.cutoff=.1,.3,.5,.9,.99 ++hydra.sweeper.study_name=conf $@ --multirun +# python -m deckard.layers.optimise \ +# ++model.art.preprocessor.name=art.defences.preprocessor.FeatureSqueezing \ +# +model.art.preprocessor.params.bit_depth=4,8,16,32,64 \ +# +model.art.preprocessor.params.clip_values=[0,255] \ +# ++hydra.sweeper.study_name=FSQ $@ --multirun + +# # # # Gaussian Augmentation (Input) +# python -m deckard.layers.optimise \ +# ++model.art.preprocessor.name=art.defences.preprocessor.GaussianAugmentation \ +# +model.art.preprocessor.params.sigma=.01,.1,.3,.5,1 \ +# +model.art.preprocessor.params.ratio=.5 \ +# +model.art.preprocessor.params.augmentation=False \ +# ++hydra.sweeper.study_name=gauss-in $@ --multirun + +# # # # # Gaussian Noise (Output) +# python -m deckard.layers.optimise \ +# ++model.art.postprocessor.name=art.defences.postprocessor.GaussianNoise \ +# ++model.art.postprocessor.params.scale=.01,.1,.3,.5,1 \ +# ++hydra.sweeper.study_name=gauss-out $@ --multirun + +# # # # # High Confidence +# python -m deckard.layers.optimise \ +# +model.art.postprocessor.name=art.defences.postprocessor.HighConfidence \ +# +model.art.postprocessor.params.cutoff=.1,.3,.5,.9,.99 \ +# ++hydra.sweeper.study_name=conf $@ --multirun diff --git a/examples/pytorch/cifar100/.dvc/config b/examples/pytorch/cifar100/.dvc/config index e69de29b..4cf322d9 100644 --- a/examples/pytorch/cifar100/.dvc/config +++ b/examples/pytorch/cifar100/.dvc/config @@ -0,0 +1,2 @@ +[core] + autostage = true diff --git a/examples/pytorch/cifar100/conf/afr.yaml b/examples/pytorch/cifar100/conf/afr.yaml index af0b6cbc..a2f65dc9 100644 --- a/examples/pytorch/cifar100/conf/afr.yaml +++ b/examples/pytorch/cifar100/conf/afr.yaml @@ -1,17 +1,20 @@ covariates: + - "adv_fit_time" - "accuracy" - "train_time" - - "predict_time" - "atk_value" - "def_value" - "data.sample.random_state" - - "adv_failure_rate" - "model_layers" - - "adv_fit_time" - - "model.trainer.kwargs.nb_epoch" + - model.trainer.nb_epoch + - predict_time +# - atk_gen +# - def_gen +fillna: + model.trainer.nb_epoch: 20 weibull: plot: - file : weibull_aft.pdf + file : weibull_aft.eps title : Weibull AFR Model labels: "Intercept: rho_": "$\\rho$" @@ -28,78 +31,94 @@ weibull: "model.trainer.nb_epoch: lambda_": "No. of Epochs" "model.trainer.batch_size: lambda_": "Batch Size" "def_gen": "Defence" + "model_layers: lambda_" : "Layers" + "def_value: lambda_" : "Defence Strength" + "predict_time: lambda_" : "$t_{predict}$" partial_effect: - - "file": "weibull_epochs_partial_effect.pdf" - "covariate_array": "model.trainer.kwargs.nb_epoch" - "values_array": [1,10,25,50] - "title": "$S(t)$ for Weibull AFR" - "ylabel": "Expectation of $S(t)$" - "xlabel": "Time $T$ (seconds)" - "legend_kwargs": { - "title": "Epochs", - "labels": ["1", "10", "25", "50"] - } -cox: - plot: - file : cox_aft.pdf - title : Cox AFR Model - labels: - "Intercept: rho_": "$\\rho$" - "Intercept: lambda_": "$\\lambda$" - "data.sample.random_state: lambda_": "Random State" - "atk_value: lambda_": "Attack Strength" - "train_time: lambda_": "$t_{train}$" - "predict_proba_time: lambda_": "$t_{predict}$" - "adv_accuracy: lambda_": "Adv. Accuracy" - "accuracy: lambda_": "Ben. Accuracy" - "adv_fit_time: lambda_": "$t_{attack}$" - "adv_failure_rate: lambda_": "$h_{adv.}(t;\\theta)$" - "failure_rate: lambda_": "$h_{ben.}(t;\\theta)$" - "model.trainer.nb_epoch: lambda_": "No. of Epochs" - "model.trainer.batch_size: lambda_": "Batch Size" - "def_gen": "Defence" - partial_effect: - - "file": "cox_epochs_partial_effect.pdf" + - "file": "weibull_epochs_partial_effect.eps" "covariate_array": "model.trainer.nb_epoch" "values_array": [1,10,25,50] - "title": "$S(t)$ for Cox AFR" + "title": "$S(t)$ for Weibull AFR" "ylabel": "Expectation of $S(t)$" "xlabel": "Time $T$ (seconds)" "legend_kwargs": { "title": "Epochs", "labels": ["1", "10", "25", "50"] } - - "file": "cox_layers_partial_effect.pdf" - "covariate_array": "model.trainer.nb_epoch" + - "file": "weibull_layers_partial_effect.eps" + "covariate_array": "model_layers" "values_array": [18, 34, 50, 101, 152] - "title": "$S(t)$ for Cox AFR" + "title": "$S(t)$ for Weibull AFR" "ylabel": "Expectation of $S(t)$" "xlabel": "Time $T$ (seconds)" "legend_kwargs": { - "title": "ResNet Layers", + "title": "Layers", "labels": ["18", "34", "50", "101", "152"] } +# cox: +# plot: +# file : cox_aft.eps +# title : Cox AFR Model +# labels: +# "Intercept: rho_": "$\\rho$" +# "Intercept: lambda_": "$\\lambda$" +# "data.sample.random_state: lambda_": "Random State" +# "atk_value: lambda_": "Attack Strength" +# "train_time: lambda_": "$t_{train}$" +# "predict_proba_time: lambda_": "$t_{predict}$" +# "adv_accuracy: lambda_": "Adv. Accuracy" +# "accuracy: lambda_": "Ben. Accuracy" +# "adv_fit_time: lambda_": "$t_{attack}$" +# "adv_failure_rate: lambda_": "$h_{adv.}(t;\\theta)$" +# "failure_rate: lambda_": "$h_{ben.}(t;\\theta)$" +# "model.trainer.nb_epoch: lambda_": "No. of Epochs" +# "model.trainer.batch_size: lambda_": "Batch Size" +# "def_gen": "Defence" +# partial_effect: +# - "file": "cox_epochs_partial_effect.eps" +# "covariate_array": "model.trainer.nb_epoch" +# "values_array": [1,10,25,50] +# "title": "$S(t)$ for Cox AFR" +# "ylabel": "Expectation of $S(t)$" +# "xlabel": "Time $T$ (seconds)" +# "legend_kwargs": { +# "title": "Epochs", +# "labels": ["1", "10", "25", "50"] +# } +# - "file": "cox_layers_partial_effect.eps" +# "covariate_array": "model_layers" +# "values_array": [18, 34, 50, 101, 152] +# "title": "$S(t)$ for Cox AFR" +# "ylabel": "Expectation of $S(t)$" +# "xlabel": "Time $T$ (seconds)" +# "legend_kwargs": { +# "title": "Layers", +# "labels": ["18", "34", "50", "101", "152"] +# } log_logistic: plot: - file : log_logistic_aft.pdf + file : log_logistic_aft.eps title : Log logistic AFR Model labels: - "Intercept: rho_": "$\\rho$" - "Intercept: lambda_": "$\\lambda$" - "data.sample.random_state: lambda_": "Random State" - "atk_value: lambda_": "Attack Strength" - "train_time: lambda_": "$t_{train}$" - "predict_proba_time: lambda_": "$t_{predict}$" - "adv_accuracy: lambda_": "Adv. Accuracy" - "accuracy: lambda_": "Ben. Accuracy" - "adv_fit_time: lambda_": "$t_{attack}$" - "adv_failure_rate: lambda_": "$h_{adv.}(t;\\theta)$" - "failure_rate: lambda_": "$h_{ben.}(t;\\theta)$" - "model.trainer.nb_epoch: lambda_": "No. of Epochs" - "model.trainer.batch_size: lambda_": "Batch Size" + "Intercept: beta_": "$\\beta$" + "Intercept: alpha_": "$\\alpha$" + "data.sample.random_state: alpha_": "Random State" + "atk_value: alpha_": "Attack Strength" + "train_time: alpha_": "$t_{train}$" + "predict_proba_time: alpha_": "$t_{predict}$" + "adv_accuracy: alpha_": "Adv. Accuracy" + "accuracy: alpha_": "Ben. Accuracy" + "adv_fit_time: alpha_": "$t_{attack}$" + "adv_failure_rate: alpha_": "$h_{adv.}(t;\\theta)$" + "failure_rate: alpha_": "$h_{ben.}(t;\\theta)$" + "model.trainer.nb_epoch: alpha_": "No. of Epochs" + "model.trainer.batch_size: alpha_": "Batch Size" "def_gen": "Defence" + "model_layers: alpha_" : "Layers" + "def_value: alpha_" : "Defence Strength" + "predict_time: alpha_" : "$t_{predict}$" partial_effect: - - "file": "log_logistic_epochs_partial_effect.pdf" + - "file": "log_logistic_epochs_partial_effect.eps" "covariate_array": "model.trainer.nb_epoch" "values_array": [1,10,25,50] "title": "$S(t)$ for Log-Logistic AFR" @@ -109,27 +128,40 @@ log_logistic: "title": "Epochs", "labels": ["1", "10", "25", "50"] } + - "file": "log_logistic_layers_partial_effect.eps" + "covariate_array": "model_layers" + "values_array": [18, 34, 50, 101, 152] + "title": "$S(t)$ for Log Logistic AFR" + "ylabel": "Expectation of $S(t)$" + "xlabel": "Time $T$ (seconds)" + "legend_kwargs": { + "title": "Layers", + "labels": ["18", "34", "50", "101", "152"] + } log_normal: plot: - file : log_normal_aft.pdf + file : log_normal_aft.eps title : Log Normal AFR Model labels: - "Intercept: rho_": "$\\rho$" - "Intercept: lambda_": "$\\lambda$" - "data.sample.random_state: lambda_": "Random State" - "atk_value: lambda_": "Attack Strength" - "train_time: lambda_": "$t_{train}$" - "predict_proba_time: lambda_": "$t_{predict}$" - "adv_accuracy: lambda_": "Adv. Accuracy" - "accuracy: lambda_": "Ben. Accuracy" - "adv_fit_time: lambda_": "$t_{attack}$" - "adv_failure_rate: lambda_": "$h_{adv.}(t;\\theta)$" - "failure_rate: lambda_": "$h_{ben.}(t;\\theta)$" - "model.trainer.nb_epoch: lambda_": "No. of Epochs" - "model.trainer.batch_size: lambda_": "Batch Size" + "Intercept: sigma_": "$\\rho$" + "Intercept: mu_": "$\\mu$" + "data.sample.random_state: mu_": "Random State" + "atk_value: mu_": "Attack Strength" + "train_time: mu_": "$t_{train}$" + "predict_proba_time: mu_": "$t_{predict}$" + "adv_accuracy: mu_": "Adv. Accuracy" + "accuracy: mu_": "Ben. Accuracy" + "adv_fit_time: mu_": "$t_{attack}$" + "adv_failure_rate: mu_": "$h_{adv.}(t;\\theta)$" + "failure_rate: mu_": "$h_{ben.}(t;\\theta)$" + "model.trainer.nb_epoch: mu_": "No. of Epochs" + "model.trainer.batch_size: mu_": "Batch Size" "def_gen": "Defence" + "model_layers: mu_" : "Layers" + "def_value: mu_" : "Defence Strength" + "predict_time: mu_" : "$t_{predict}$" partial_effect: - - "file": "log_normal_epochs_partial_effect.pdf" + - "file": "log_normal_epochs_partial_effect.eps" "covariate_array": "model.trainer.nb_epoch" "values_array": [1,10,25,50] "title": "$S(t)$ for Log-Normal AFR" @@ -139,3 +171,14 @@ log_normal: "title": "Epochs", "labels": ["1", "10", "25", "50"] } + - "file": "log_normal_layers_partial_effect.eps" + "covariate_array": "model_layers" + "values_array": [18, 34, 50, 101, 152] + "title": "$S(t)$ for Log Normal AFR" + "ylabel": "Expectation of $S(t)$" + "xlabel": "Time $T$ (seconds)" + "legend_kwargs": { + "title": "Layers", + "labels": ["18", "34", "50", "101", "152"], + "bbox_to_anchor": [1.05, 1], + } diff --git a/examples/pytorch/cifar100/conf/cifar100.yaml b/examples/pytorch/cifar100/conf/cifar100.yaml index 7a7c9b41..4f9da157 100644 --- a/examples/pytorch/cifar100/conf/cifar100.yaml +++ b/examples/pytorch/cifar100/conf/cifar100.yaml @@ -25,7 +25,7 @@ hydra: study_name: control storage: sqlite:///model.db n_jobs: 4 - n_trials : 32 + n_trials : 10 params: ++data.sample.random_state: choice(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) ++model.art.initialize.optimizer.lr: choice(10, 1, 0.1, 0.01, 0.001, .0001, .00001, 0.000001) diff --git a/examples/pytorch/cifar100/conf/clean.yaml b/examples/pytorch/cifar100/conf/clean.yaml index f33ce91f..52f6c912 100644 --- a/examples/pytorch/cifar100/conf/clean.yaml +++ b/examples/pytorch/cifar100/conf/clean.yaml @@ -7,11 +7,11 @@ attacks: ThresholdAttack: Thresh defences: Control: Control - # FeatureSqueezing: FSQ - # GaussianAugmentation: Gauss-in - # GaussianNoise: Gauss-out - # HighConfidence: Conf - nb_epoch: Epochs + FeatureSqueezing: FSQ + GaussianAugmentation: Gauss-in + GaussianNoise: Gauss-out + HighConfidence: Conf + nb_epoch : Epochs model_layers: Control params: Deep: attack.init.nb_grads @@ -20,11 +20,15 @@ params: Pixel: attack.init.th PGD: attack.init.eps Thresh: attack.init.th - Gauss-out: model.art.pipeline.postprocessor.scale - Conf: model.art.pipeline.postprocessor.cutoff - FSQ: model.art.pipeline.preprocessor.bit_depth - Gauss-in: model.art.pipeline.preprocessor.sigma + Gauss-out: model.art.postprocessor.params.scale + Conf: model.art.postprocessor.params.cutoff + FSQ: model.art.preprocessor.params.bit_depth + Gauss-in: model.art.preprocessor.params.sigma Control: model_layers Epochs: model.trainer.nb_epoch + control: + model_layers: 18 + defaults: + Epochs: 10 fillna: - Epochs: 20 + Epochs: 10 diff --git a/examples/pytorch/cifar100/conf/compile.yaml b/examples/pytorch/cifar100/conf/compile.yaml deleted file mode 100644 index 1314aceb..00000000 --- a/examples/pytorch/cifar100/conf/compile.yaml +++ /dev/null @@ -1,33 +0,0 @@ -attacks: - # CarliniL0Method: CW_0 - # CarliniL2Method: CW_2 - # CarliniLInfMethod: CW_inf - DeepFool: Deep - FastGradientMethod: FGM - HopSkipJump: HSJ - PixelAttack: Pixel - ProjectedGradientDescent: PGD - ThresholdAttack: Thresh -defences: - Control: Control - FeatureSqueezing: FSQ - GaussianAugmentation: Gauss-in - GaussianNoise: Gauss-out - HighConfidence: Conf - Epochs: Epochs -params: - # art.attacks.evasion.CarliniL0Method: attack.init.confidence - # art.attacks.evasion.CarliniL2Method: attack.init.confidence - # art.attacks.evasion.CarliniLInfMethod: attack.init.confidence - Deep: attack.init.nb_grads - FGM: attack.init.eps - HSJ: attack.init.max_iter - Pixel: attack.init.th - PGD: attack.init.eps - Thresh: attack.init.th - Gauss-out: model.art.pipeline.postprocessor.scale - Conf: model.art.pipeline.postprocessor.cutoff - FSQ: model.art.pipeline.preprocessor.bit_depth - Gauss-in: model.art.pipeline.preprocessor.sigma - Control: model_layers - Epochs: model.trainer.nb_epoch diff --git a/examples/pytorch/cifar100/conf/plots.yaml b/examples/pytorch/cifar100/conf/plots.yaml index 464f53b3..f68955fc 100644 --- a/examples/pytorch/cifar100/conf/plots.yaml +++ b/examples/pytorch/cifar100/conf/plots.yaml @@ -1,5 +1,5 @@ cat_plot: -- file: adv_accuracy_vs_defence_type.pdf +- file: adv_accuracy_vs_defence_type.eps hue: model_name kind: boxen set: @@ -16,7 +16,7 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: ben_accuracy_vs_defence_type.pdf +- file: ben_accuracy_vs_defence_type.eps hue: model_name kind: boxen titles: Ben. Accuracy vs Defence Type @@ -31,7 +31,7 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: ben_failures_per_train_time_vs_defence_type.pdf +- file: ben_failures_per_train_time_vs_defence_type.eps hue: model_name kind: boxen set: @@ -48,7 +48,7 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: adv_failures_per_train_time_vs_defence_type.pdf +- file: adv_failures_per_train_time_vs_defence_type.eps hue: model_name kind: boxen set: @@ -65,7 +65,7 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: adv_failures_per_train_time_vs_attack_type.pdf +- file: adv_failures_per_train_time_vs_attack_type.eps hue: model_name kind: boxen legend_title: Model Name @@ -83,15 +83,15 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: adv_failures_per_test_time_vs_defence_type.pdf +- file: adv_failures_per_test_time_vs_defence_type.eps hue: model_name kind: boxen legend_title: Model Name - titles: $h_{adv}$ vs Defence Type + titles: $f_{adv}$ vs Defence Type x: def_gen xlabels: Defence Type y: adv_failure_rate - ylabels: $h_{adv.}$ + ylabels: $f_{adv.}$ rotation : 90 hue_order: - ResNet18 @@ -99,23 +99,7 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: adv_accuracy_vs_defence_type.pdf - hue: model_name - kind: boxen - legend_title: Model Name - titles: Adv. Accuracy vs Defence Type - x: def_gen - xlabels: Defence Type - y: adv_accuracy - ylabels: Adv. Ben. Accuracy - rotation : 90 - hue_order: - - ResNet18 - - ResNet34 - - ResNet50 - - ResNet101 - - ResNet152 -- file: adv_accuracy_vs_attack_type.pdf +- file: adv_accuracy_vs_attack_type.eps hue: model_name kind: boxen legend_title: Model Name @@ -131,17 +115,17 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: ben_failure_rate_vs_defence_type.pdf +- file: ben_failure_rate_vs_defence_type.eps hue: model_name kind: boxen legend_title: Model Name set: yscale: log - titles: $h_{ben}(t; \theta)$ vs Defence Type + titles: $f_{ben}(t; \theta)$ vs Defence Type x: def_gen xlabels: Defence Type y: failure_rate - ylabels: $h_{ben}(t; \theta)$ + ylabels: $f_{ben}(t; \theta)$ rotation : 90 hue_order: - ResNet18 @@ -150,7 +134,7 @@ cat_plot: - ResNet101 - ResNet152 line_plot: -- file: def_param_vs_accuracy.pdf +- file: def_param_vs_accuracy.eps hue: def_gen legend: {"bbox_to_anchor": [1.05, 1], "title": "Defence"} title: Ben. Accuracy vs Defence Strength @@ -160,15 +144,17 @@ line_plot: y: accuracy y_scale: ylabel: Ben. Accuracy - # hue_order: - # - Control - # - Conf - # - Epochs - # - Gauss-in - # - Gauss-out - # - Conf - # - FSQ -- file: def_param_vs_adv_accuracy.pdf + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars +- file: def_param_vs_adv_accuracy.eps hue: def_gen legend: {"bbox_to_anchor": [1.05, 1], "title": "Defence"} title: Adv. Accuracy vs Defence Strength @@ -178,33 +164,37 @@ line_plot: y: adv_accuracy y_scale: ylabel: Adv. Accuracy - # hue_order: - # - Control - # - Conf - # - Epochs - # - Gauss-in - # - Gauss-out - # - Conf - # - FSQ -- file: def_param_vs_adv_failure_rate.pdf + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars +- file: def_param_vs_adv_failure_rate.eps hue: def_gen legend: {"bbox_to_anchor": [1.05, 1], "title": "Defence"} - title: $h_{adv}$ vs Defence Strength + title: $f_{adv}$ vs Defence Strength x: def_value - x_scale: linear + x_scale: log xlabel: Defence Control Parameter y: adv_failure_rate y_scale: log - ylabel: $h_{adv.}$ - # hue_order: - # - Control - # - Conf - # - Epochs - # - Gauss-in - # - Gauss-out - # - Conf - # - FSQ -- file: atk_param_vs_accuracy.pdf + ylabel: $f_{adv.}$ + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars +- file: atk_param_vs_accuracy.eps hue: atk_gen legend: {bbox_to_anchor: [1.05, 1]} title: Adv. Accuracy vs Attack Strength @@ -221,16 +211,17 @@ line_plot: - HSJ - Pixel - Thresh - + errorbar: se + err_style: bars scatter_plot: - x: train_time_per_sample y: adv_failure_rate hue: model_name xlabel: $t_{train}$ - ylabel: $h_{adv}$ - title: $h_{adv}$ vs $t_{train}$ - file: adv_failure_rate_vs_train_time.pdf - y_scale: log + ylabel: $f_{adv}$ + title: $f_{adv}$ vs $t_{train}$ + file: adv_failure_rate_vs_train_time.eps + y_scale: linear x_scale: log legend: title: Model Name diff --git a/examples/pytorch/cifar100/dvc.lock b/examples/pytorch/cifar100/dvc.lock index 22f99534..70bd1857 100644 --- a/examples/pytorch/cifar100/dvc.lock +++ b/examples/pytorch/cifar100/dvc.lock @@ -1,387 +1,598 @@ schema: '2.0' stages: - train: - cmd: python -m deckard.layers.experiment train --config_file cifar100.yaml + clean@attack: + cmd: python -m deckard.layers.clean_data -i cifar100/reports/attack.csv -o cifar100/reports/clean_attack.csv + -c conf/clean.yaml + deps: + - path: cifar100/reports/attack.csv + md5: e1e1b67a591afdb5c9ea8f35003e5394 + size: 36167864 params: params.yaml: - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar100 - path: original_data - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - files: - _target_: deckard.base.files.FileConfig - adv_predictions_file: adv_predictions.json - attack_dir: attacks - attack_file: attack - attack_type: .pkl - data_dir: data - data_file: data - data_type: .pkl - directory: cifar100 - model_dir: models - model_file: model - model_type: .pt - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar100 - path: original_data - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - initialize: - clip_values: - - 0 - - 255 - criterion: - name: torch.nn.CrossEntropyLoss - optimizer: - lr: 0.01 - momentum: 0.9 - name: torch.optim.SGD - library: pytorch - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar100 - path: original_data - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - init: - _target_: deckard.base.model.ModelInitializer - name: torch_example.ResNet18 - num_channels: 3 - num_classes: 100 - library: pytorch - trainer: - batch_size: 1024 - nb_epoch: 10 - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + files.directory: cifar100 + files.reports: reports + conf/clean.yaml: + attacks: + DeepFool: Deep + FastGradientMethod: FGM + HopSkipJump: HSJ + PixelAttack: Pixel + ProjectedGradientDescent: PGD + ThresholdAttack: Thresh + defences: + Control: Control + FeatureSqueezing: FSQ + GaussianAugmentation: Gauss-in + GaussianNoise: Gauss-out + HighConfidence: Conf + nb_epoch: Epochs + model_layers: Control + fillna: + Epochs: 10 + params: + Deep: attack.init.nb_grads + FGM: attack.init.eps + HSJ: attack.init.max_iter + Pixel: attack.init.th + PGD: attack.init.eps + Thresh: attack.init.th + Gauss-out: model.art.postprocessor.params.scale + Conf: model.art.postprocessor.params.cutoff + FSQ: model.art.preprocessor.params.bit_depth + Gauss-in: model.art.preprocessor.params.sigma + Control: model_layers + Epochs: model.trainer.nb_epoch + control: + model_layers: 18 + defaults: + Epochs: 10 outs: - - path: cifar100/reports/train/default/predictions.json - md5: aeaec96d96a661f5185062898e19fd0d - size: 24404547 - - path: cifar100/reports/train/default/score_dict.json - md5: f3070a3adef622cd7899e813660957a8 - size: 898 - attack: - cmd: python -m deckard.layers.experiment attack --config_file cifar100.yaml + - path: cifar100/reports/clean_attack.csv + md5: 1e74314d258faf3aa6cd57f4f0aee4d4 + size: 30258589 + afr: + cmd: python -m deckard.layers.afr --dataset cifar100 --data_file cifar100/reports/clean_attack.csv --target + adv_accuracy --duration_col predict_time --dataset cifar100 --config_file conf/afr.yaml + --plots_folder cifar100/plots/ deps: - - path: cifar100/reports/train/default/predictions.json - md5: aeaec96d96a661f5185062898e19fd0d - size: 24404547 + - path: cifar100/reports/clean_attack.csv + md5: 1e74314d258faf3aa6cd57f4f0aee4d4 + size: 30258589 params: params.yaml: - attack: - _target_: deckard.base.attack.Attack - attack_size: 10 - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar100 - path: original_data - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - init: - _target_: deckard.base.attack.AttackInitializer - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar100 - path: original_data - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - initialize: - clip_values: - - 0 - - 255 - criterion: - name: torch.nn.CrossEntropyLoss - optimizer: - lr: 0.01 - momentum: 0.9 - name: torch.optim.SGD - library: pytorch - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar100 - path: original_data - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - init: - _target_: deckard.base.model.ModelInitializer - name: torch_example.ResNet18 - num_channels: 3 - num_classes: 100 - library: pytorch - trainer: - batch_size: 1024 - nb_epoch: 10 - name: art.attacks.evasion.HopSkipJump - method: evasion - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar100 - path: original_data - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - initialize: - clip_values: - - 0 - - 255 - criterion: - name: torch.nn.CrossEntropyLoss - optimizer: - lr: 0.01 - momentum: 0.9 - name: torch.optim.SGD - library: pytorch - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar100 - path: original_data - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - init: - _target_: deckard.base.model.ModelInitializer - name: torch_example.ResNet18 - num_channels: 3 - num_classes: 100 - library: pytorch - trainer: - batch_size: 1024 - nb_epoch: 10 - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar100 - path: original_data - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - files: - _target_: deckard.base.files.FileConfig - adv_predictions_file: adv_predictions.json - attack_dir: attacks - attack_file: attack - attack_type: .pkl - data_dir: data - data_file: data - data_type: .pkl - directory: cifar100 - model_dir: models - model_file: model - model_type: .pt - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar100 - path: original_data - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - initialize: - clip_values: - - 0 - - 255 - criterion: - name: torch.nn.CrossEntropyLoss - optimizer: - lr: 0.01 - momentum: 0.9 - name: torch.optim.SGD - library: pytorch - data: - _target_: deckard.base.data.Data - generate: - name: torch_cifar100 - path: original_data - sample: - random_state: 0 - stratify: true - sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - init: - _target_: deckard.base.model.ModelInitializer - name: torch_example.ResNet18 - num_channels: 3 - num_classes: 100 - library: pytorch - trainer: - batch_size: 1024 - nb_epoch: 10 - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: cifar100/attacks/attack.pkl - md5: 5317760d3c6f266ece07523e98517d46 - size: 123046 - - path: cifar100/reports/attack/default/adv_predictions.json - md5: 18a846aed49541df9c48131c8f8ebc91 - size: 21453 - - path: cifar100/reports/attack/default/score_dict.json - md5: e7b9599098942b56f82f1f66c1c53280 - size: 1166 - attacks@ResNet18: - cmd: bash attacks.sh ++attack.attack_size=100 ++model.init.name=torch_example.ResNet18 - stage=attack ++hydra.sweeper.storage=sqlite:///cifar100/reports/attack/ResNet18.db - --config-name cifar100.yaml - deps: - - path: attacks.sh - md5: d78e6d6b697480fbccfb58bd791af506 - size: 2897 - - path: cifar100/reports/attack/default/score_dict.json - md5: 5d129878ac159ae1b3dbd9b135511973 - size: 1172 - - path: models.sh - md5: d477bb16f5082498a19285ded1a782e5 - size: 1522 + files.directory: cifar100 + conf/afr.yaml: + covariates: + - adv_fit_time + - accuracy + - train_time + - atk_value + - def_value + - data.sample.random_state + - model_layers + - model.trainer.nb_epoch + - predict_time + log_logistic: + plot: + file: log_logistic_aft.eps + title: Log logistic AFR Model + labels: + 'Intercept: beta_': $\beta$ + 'Intercept: alpha_': $\alpha$ + 'data.sample.random_state: alpha_': Random State + 'atk_value: alpha_': Attack Strength + 'train_time: alpha_': $t_{train}$ + 'predict_proba_time: alpha_': $t_{predict}$ + 'adv_accuracy: alpha_': Adv. Accuracy + 'accuracy: alpha_': Ben. Accuracy + 'adv_fit_time: alpha_': $t_{attack}$ + 'adv_failure_rate: alpha_': $h_{adv.}(t;\theta)$ + 'failure_rate: alpha_': $h_{ben.}(t;\theta)$ + 'model.trainer.nb_epoch: alpha_': No. of Epochs + 'model.trainer.batch_size: alpha_': Batch Size + def_gen: Defence + 'model_layers: alpha_': Layers + 'def_value: alpha_': Defence Strength + 'predict_time: alpha_': $t_{predict}$ + partial_effect: + - file: log_logistic_epochs_partial_effect.eps + covariate_array: model.trainer.nb_epoch + values_array: + - 1 + - 10 + - 25 + - 50 + title: $S(t)$ for Log-Logistic AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Epochs + labels: + - '1' + - '10' + - '25' + - '50' + - file: log_logistic_layers_partial_effect.eps + covariate_array: model_layers + values_array: + - 18 + - 34 + - 50 + - 101 + - 152 + title: $S(t)$ for Log Logistic AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Layers + labels: + - '18' + - '34' + - '50' + - '101' + - '152' + log_normal: + plot: + file: log_normal_aft.eps + title: Log Normal AFR Model + labels: + 'Intercept: sigma_': $\rho$ + 'Intercept: mu_': $\mu$ + 'data.sample.random_state: mu_': Random State + 'atk_value: mu_': Attack Strength + 'train_time: mu_': $t_{train}$ + 'predict_proba_time: mu_': $t_{predict}$ + 'adv_accuracy: mu_': Adv. Accuracy + 'accuracy: mu_': Ben. Accuracy + 'adv_fit_time: mu_': $t_{attack}$ + 'adv_failure_rate: mu_': $h_{adv.}(t;\theta)$ + 'failure_rate: mu_': $h_{ben.}(t;\theta)$ + 'model.trainer.nb_epoch: mu_': No. of Epochs + 'model.trainer.batch_size: mu_': Batch Size + def_gen: Defence + 'model_layers: mu_': Layers + 'def_value: mu_': Defence Strength + 'predict_time: mu_': $t_{predict}$ + partial_effect: + - file: log_normal_epochs_partial_effect.eps + covariate_array: model.trainer.nb_epoch + values_array: + - 1 + - 10 + - 25 + - 50 + title: $S(t)$ for Log-Normal AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Epochs + labels: + - '1' + - '10' + - '25' + - '50' + - file: log_normal_layers_partial_effect.eps + covariate_array: model_layers + values_array: + - 18 + - 34 + - 50 + - 101 + - 152 + title: $S(t)$ for Log Normal AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Layers + labels: + - '18' + - '34' + - '50' + - '101' + - '152' + bbox_to_anchor: + - 1.05 + - 1 + weibull: + plot: + file: weibull_aft.eps + title: Weibull AFR Model + labels: + 'Intercept: rho_': $\rho$ + 'Intercept: lambda_': $\lambda$ + 'data.sample.random_state: lambda_': Random State + 'atk_value: lambda_': Attack Strength + 'train_time: lambda_': $t_{train}$ + 'predict_proba_time: lambda_': $t_{predict}$ + 'adv_accuracy: lambda_': Adv. Accuracy + 'accuracy: lambda_': Ben. Accuracy + 'adv_fit_time: lambda_': $t_{attack}$ + 'adv_failure_rate: lambda_': $h_{adv.}(t;\theta)$ + 'failure_rate: lambda_': $h_{ben.}(t;\theta)$ + 'model.trainer.nb_epoch: lambda_': No. of Epochs + 'model.trainer.batch_size: lambda_': Batch Size + def_gen: Defence + 'model_layers: lambda_': Layers + 'def_value: lambda_': Defence Strength + 'predict_time: lambda_': $t_{predict}$ + partial_effect: + - file: weibull_epochs_partial_effect.eps + covariate_array: model.trainer.nb_epoch + values_array: + - 1 + - 10 + - 25 + - 50 + title: $S(t)$ for Weibull AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Epochs + labels: + - '1' + - '10' + - '25' + - '50' + - file: weibull_layers_partial_effect.eps + covariate_array: model_layers + values_array: + - 18 + - 34 + - 50 + - 101 + - 152 + title: $S(t)$ for Weibull AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Layers + labels: + - '18' + - '34' + - '50' + - '101' + - '152' outs: - - path: cifar100/reports/attack/ResNet18.db - md5: 2ba685aa1d1cd9ec621eb3a1b767a090 - size: 4087808 - attacks@ResNet34: - cmd: bash attacks.sh ++attack.attack_size=100 ++model.init.name=torch_example.ResNet34 - stage=attack ++hydra.sweeper.storage=sqlite:///cifar100/reports/attack/ResNet34.db - --config-name cifar100.yaml + - path: cifar100/plots/aft_comparison.csv + md5: 9c486aa8ca1b035f2d501971353760b0 + size: 381 + - path: cifar100/plots/aft_comparison.tex + md5: b8d8117defa391250aeaace7a9d0f31d + size: 420 + - path: cifar100/plots/log_logistic_aft.eps + md5: 25570be7c47838516dc2ce6737ea23cf + size: 43150 + - path: cifar100/plots/log_logistic_epochs_partial_effect.eps + md5: dce601a023295267b4219a3d344c6390 + size: 42195 + - path: cifar100/plots/log_logistic_layers_partial_effect.eps + md5: 422b035d3bffc50166597ad26e175400 + size: 43155 + - path: cifar100/plots/log_normal_aft.eps + md5: 2cf3e5852ca6cd9bee9aa66974db1ad7 + size: 44116 + - path: cifar100/plots/log_normal_epochs_partial_effect.eps + md5: 0cb86ef85c693118c8b6abc83a503ac3 + size: 42931 + - path: cifar100/plots/log_normal_layers_partial_effect.eps + md5: 4386a26119906fa28709501875c95660 + size: 43230 + - path: cifar100/plots/weibull_aft.eps + md5: 9509c6ffd09f15d01d411d76074695d3 + size: 41413 + - path: cifar100/plots/weibull_epochs_partial_effect.eps + md5: 65e05f0fee7247d061ce52014821d3dd + size: 41438 + - path: cifar100/plots/weibull_layers_partial_effect.eps + md5: 8903ae7d7c4fd97247d80abde3febaed + size: 42657 + plot: + cmd: python -m deckard.layers.plots --path cifar100/plots/ --file cifar100/reports/clean_attack.csv + -c conf/plots.yaml deps: - - path: attacks.sh - md5: d78e6d6b697480fbccfb58bd791af506 - size: 2897 - - path: cifar100/reports/attack/default/score_dict.json - md5: 5d129878ac159ae1b3dbd9b135511973 - size: 1172 - - path: models.sh - md5: d477bb16f5082498a19285ded1a782e5 - size: 1522 + - path: cifar100/reports/clean_attack.csv + md5: 1e74314d258faf3aa6cd57f4f0aee4d4 + size: 30258589 + params: + params.yaml: + files.directory: cifar100 + files.reports: reports + conf/plots.yaml: + cat_plot: + - file: adv_accuracy_vs_defence_type.eps + hue: model_name + kind: boxen + set: + yscale: linear + titles: Adv. Accuracy vs Defence Type + x: def_gen + xlabels: Defence Type + y: adv_accuracy + ylabels: Adv. Accuracy + rotation: 90 + hue_order: + - ResNet18 + - ResNet34 + - ResNet50 + - ResNet101 + - ResNet152 + - file: ben_accuracy_vs_defence_type.eps + hue: model_name + kind: boxen + titles: Ben. Accuracy vs Defence Type + x: def_gen + xlabels: Defence Type + y: accuracy + ylabels: Ben. Accuracy + rotation: 90 + hue_order: + - ResNet18 + - ResNet34 + - ResNet50 + - ResNet101 + - ResNet152 + - file: ben_failures_per_train_time_vs_defence_type.eps + hue: model_name + kind: boxen + set: + yscale: log + titles: $\bar{C}_{ben.}$ vs Defence Type + x: def_gen + xlabels: Defence Type + y: training_time_per_failure + ylabels: $\bar{C}_{ben.}$ + rotation: 90 + hue_order: + - ResNet18 + - ResNet34 + - ResNet50 + - ResNet101 + - ResNet152 + - file: adv_failures_per_train_time_vs_defence_type.eps + hue: model_name + kind: boxen + set: + yscale: log + titles: $\bar{C}_{adv.}$ vs Defence Type + x: def_gen + xlabels: Defence Type + y: training_time_per_adv_failure + ylabels: $\bar{C}_{adv.}$ + rotation: 90 + hue_order: + - ResNet18 + - ResNet34 + - ResNet50 + - ResNet101 + - ResNet152 + - file: adv_failures_per_train_time_vs_attack_type.eps + hue: model_name + kind: boxen + legend_title: Model Name + set: + yscale: log + titles: $\bar{C}_{adv.}$ vs Attack Type + x: atk_gen + xlabels: Attack Type + y: training_time_per_adv_failure + ylabels: $\bar{C}_{adv.}$ + rotation: 90 + hue_order: + - ResNet18 + - ResNet34 + - ResNet50 + - ResNet101 + - ResNet152 + - file: adv_failures_per_test_time_vs_defence_type.eps + hue: model_name + kind: boxen + legend_title: Model Name + titles: $f_{adv}$ vs Defence Type + x: def_gen + xlabels: Defence Type + y: adv_failure_rate + ylabels: $f_{adv.}$ + rotation: 90 + hue_order: + - ResNet18 + - ResNet34 + - ResNet50 + - ResNet101 + - ResNet152 + - file: adv_accuracy_vs_attack_type.eps + hue: model_name + kind: boxen + legend_title: Model Name + titles: Adv. Accuracy vs Attack Type + x: atk_gen + xlabels: Attack Type + y: adv_accuracy + ylabels: Adv. Accuracy + rotation: 90 + hue_order: + - ResNet18 + - ResNet34 + - ResNet50 + - ResNet101 + - ResNet152 + - file: ben_failure_rate_vs_defence_type.eps + hue: model_name + kind: boxen + legend_title: Model Name + set: + yscale: log + titles: $f_{ben}(t; \theta)$ vs Defence Type + x: def_gen + xlabels: Defence Type + y: failure_rate + ylabels: $f_{ben}(t; \theta)$ + rotation: 90 + hue_order: + - ResNet18 + - ResNet34 + - ResNet50 + - ResNet101 + - ResNet152 + line_plot: + - file: def_param_vs_accuracy.eps + hue: def_gen + legend: + bbox_to_anchor: + - 1.05 + - 1 + title: Defence + title: Ben. Accuracy vs Defence Strength + x: def_value + x_scale: linear + xlabel: Defence Control Parameter + y: accuracy + y_scale: + ylabel: Ben. Accuracy + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars + - file: def_param_vs_adv_accuracy.eps + hue: def_gen + legend: + bbox_to_anchor: + - 1.05 + - 1 + title: Defence + title: Adv. Accuracy vs Defence Strength + x: def_value + x_scale: linear + xlabel: Defence Control Parameter + y: adv_accuracy + y_scale: + ylabel: Adv. Accuracy + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars + - file: def_param_vs_adv_failure_rate.eps + hue: def_gen + legend: + bbox_to_anchor: + - 1.05 + - 1 + title: Defence + title: $f_{adv}$ vs Defence Strength + x: def_value + x_scale: log + xlabel: Defence Control Parameter + y: adv_failure_rate + y_scale: log + ylabel: $f_{adv.}$ + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars + - file: atk_param_vs_accuracy.eps + hue: atk_gen + legend: + bbox_to_anchor: + - 1.05 + - 1 + title: Adv. Accuracy vs Attack Strength + x: atk_value + x_scale: linear + xlabel: Attack Control Parameter + y: adv_accuracy + y_scale: + ylabel: Adv. Accuracy + hue_order: + - FGM + - PGD + - Deep + - HSJ + - Pixel + - Thresh + errorbar: se + err_style: bars + scatter_plot: + - x: train_time_per_sample + y: adv_failure_rate + hue: model_name + xlabel: $t_{train}$ + ylabel: $f_{adv}$ + title: $f_{adv}$ vs $t_{train}$ + file: adv_failure_rate_vs_train_time.eps + y_scale: linear + x_scale: log + legend: + title: Model Name + bbox_to_anchor: + - 1.05 + - 1 + hue_order: + - ResNet18 + - ResNet34 + - ResNet50 + - ResNet101 + - ResNet152 outs: - - path: cifar100/reports/attack/ResNet34.db - md5: ed1216c4d9bbfa4f0b6e343783970074 - size: 3174400 - attacks@ResNet50: - cmd: bash attacks.sh ++attack.attack_size=100 ++model.init.name=torch_example.ResNet50 - stage=attack ++hydra.sweeper.storage=sqlite:///cifar100/reports/attack/ResNet50.db - --config-name cifar100.yaml + - path: cifar100/plots/adv_accuracy_vs_attack_type.eps + md5: c8da4b76e14a314a366460692dc783c5 + size: 110928 + - path: cifar100/plots/adv_accuracy_vs_defence_type.eps + md5: 28fc6c3ec076122c0817ff72bf445724 + size: 89728 + - path: cifar100/plots/adv_failure_rate_vs_train_time.eps + md5: 6b120de323dda22b1d1f7c865a3fee40 + size: 1197829 + - path: cifar100/plots/adv_failures_per_test_time_vs_defence_type.eps + md5: 992a14b470a5297898effaacef21da58 + size: 117343 + - path: cifar100/plots/adv_failures_per_train_time_vs_attack_type.eps + md5: da342b3a5f817de813e3831fb704a137 + size: 135357 + - path: cifar100/plots/adv_failures_per_train_time_vs_defence_type.eps + md5: 02573bd82b9a8e91f222b223265d4c88 + size: 110305 + - path: cifar100/plots/atk_param_vs_accuracy.eps + md5: 3a9ee8b1f8d7632d5e5346ee98fd7302 + size: 38228 + - path: cifar100/plots/ben_accuracy_vs_defence_type.eps + md5: a87ff0bfe4abeb62c8a11b14625094c6 + size: 100131 + - path: cifar100/plots/ben_failure_rate_vs_defence_type.eps + md5: 353409db77a5e01b3e1cace395724747 + size: 118669 + - path: cifar100/plots/ben_failures_per_train_time_vs_defence_type.eps + md5: c5b1247944a237ece405e2cc6d8d5f17 + size: 109170 + - path: cifar100/plots/def_param_vs_accuracy.eps + md5: a1d1bca25b228523ca3518e85407d9f9 + size: 38880 + - path: cifar100/plots/def_param_vs_adv_accuracy.eps + md5: 361afc1a730165a3c05d80ad16fe4c66 + size: 37677 + - path: cifar100/plots/def_param_vs_adv_failure_rate.eps + md5: cd8cc762d8541f690335ea6118eff227 + size: 41408 + copy_results: + cmd: mkdir -p ~/ml_afr/cifar100 && cp -r cifar100/plots/* ~/ml_afr/cifar100 deps: - - path: attacks.sh - md5: d78e6d6b697480fbccfb58bd791af506 - size: 2897 - - path: cifar100/reports/attack/default/score_dict.json - md5: 5d129878ac159ae1b3dbd9b135511973 - size: 1172 - - path: models.sh - md5: d477bb16f5082498a19285ded1a782e5 - size: 1522 - outs: - - path: cifar100/reports/attack/ResNet50.db - md5: 4915a611bc8eed14feb13e4e5fcf7bc1 - size: 2863104 + - path: cifar100/plots/ + md5: 5862826c5918c815b4814de0b1d5e53b.dir + size: 2876717 + nfiles: 27 diff --git a/examples/pytorch/cifar100/dvc.yaml b/examples/pytorch/cifar100/dvc.yaml index 140eff79..60c91cf5 100644 --- a/examples/pytorch/cifar100/dvc.yaml +++ b/examples/pytorch/cifar100/dvc.yaml @@ -4,7 +4,6 @@ vars: - conf/plots.yaml:cat_plot - conf/afr.yaml:covariates - conf/afr.yaml:weibull - - conf/afr.yaml:cox - conf/afr.yaml:log_logistic - conf/afr.yaml:log_normal - conf/clean.yaml:attacks @@ -20,7 +19,13 @@ stages: - scorers - files outs: + - ${files.directory}/${files.data_dir}/${files.data_file}${files.data_type} + - ${files.directory}/${files.model_dir}/${files.model_file}${files.model_type} + - ${files.directory}/${files.model_dir}/${files.model_file}.optimizer${files.model_type} + # - ${files.directory}/${files.reports}/train/${files.name}/${files.params_file} + # - ${files.directory}/${files.reports}/train/${files.name}/${files.test_labels_file} # Omit to save space - ${files.directory}/${files.reports}/train/${files.name}/${files.predictions_file} # logit outputs for our model + # - ${files.directory}/${files.reports}/train/${files.name}/${files.probabilities_file} # Omit to save space metrics: - ${files.directory}/${files.reports}/train/${files.name}/${files.score_dict_file} attack: @@ -34,25 +39,27 @@ stages: outs: - ${files.directory}/${files.attack_dir}/${files.attack_file}${files.attack_type} - ${files.directory}/${files.reports}/attack/${files.name}/${files.adv_predictions_file} + # - ${files.directory}/${files.reports}/attack/${files.name}/${files.params_file} deps: - - ${files.directory}/${files.reports}/train/${files.name}/${files.predictions_file} + - ${files.directory}/${files.data_dir}/${files.data_file}${files.data_type} + - ${files.directory}/${files.model_dir}/${files.model_file}${files.model_type} metrics: - ${files.directory}/${files.reports}/attack/${files.name}/${files.score_dict_file} ############################################################################## - # models: - # foreach: # This is a loop over the ResNet models + # models: # This is a loop over the ResNet models + # foreach: # - ResNet18 - # - ResNet34 - # - ResNet50 - # - ResNet101 - # - ResNet152 - # do: - # cmd: bash models.sh ++attack.attack_size=100 ++model.init.name=torch_example.${item} stage=train ++hydra.sweeper.storage=sqlite:///${files.directory}/${files.reports}/train/${item}.db --config-name cifar100.yaml + # # - ResNet34 + # # - ResNet50 + # # - ResNet101 + # # - ResNet152 + # do: # This script configures eazch defence + # cmd: bash models.sh ++model.init.name=torch_example.${item} stage=train ++hydra.sweeper.storage=sqlite:///${files.directory}/${files.reports}/train/${item}.db --config-name cifar100.yaml # deps: - # - models.sh # This script configures each defence - # - attacks.sh # This script configures each attack - # - ${files.directory}/${files.reports}/train/${files.name}/${files.score_dict_file} # This is here just to ensure it runs after the attack stage + # - models.sh + # - ${files.directory}/${files.model_dir}/${files.model_file}${files.model_type} + # - ${files.directory}/${files.model_dir}/${files.model_file}.optimizer${files.model_type} # outs: # - ${files.directory}/${files.reports}/train/${item}.db: # This outputs a database file for each model # cache: True @@ -121,7 +128,6 @@ stages: - ${files.directory}/plots/${cat_plot[5].file} - ${files.directory}/plots/${cat_plot[6].file} - ${files.directory}/plots/${cat_plot[7].file} - - ${files.directory}/plots/${cat_plot[8].file} - ${files.directory}/plots/${line_plot[0].file} - ${files.directory}/plots/${line_plot[1].file} - ${files.directory}/plots/${line_plot[2].file} @@ -135,18 +141,19 @@ stages: - scatter_plot - cat_plot afr: - cmd: python -m deckard.layers.afr --dataset ${files.directory} --data_file ${files.directory}/plots/clean_attack.csv --target adv_accuracy --duration_col adv_fit_time --dataset cifar100 --config_file conf/afr.yaml + cmd: python -m deckard.layers.afr --dataset ${files.directory} --data_file ${files.directory}/${files.reports}/clean_attack.csv --target adv_accuracy --duration_col predict_time --dataset cifar100 --config_file conf/afr.yaml --plots_folder ${files.directory}/plots/ deps: - - ${files.directory}/plots/clean_attack.csv + - ${files.directory}/${files.reports}/clean_attack.csv plots: - - ${files.directory}/plots/weibull_aft.pdf - - ${files.directory}/plots/weibull_partial_effects.pdf - - ${files.directory}/plots/cox_partial_effects.pdf - - ${files.directory}/plots/cox_aft.pdf - - ${files.directory}/plots/log_logistic_aft.pdf - - ${files.directory}/plots/log_logistic_partial_effects.pdf - - ${files.directory}/plots/log_normal_aft.pdf - - ${files.directory}/plots/log_normal_partial_effects.pdf + - ${files.directory}/plots/weibull_aft.eps + - ${files.directory}/plots/weibull_epochs_partial_effect.eps + - ${files.directory}/plots/weibull_layers_partial_effect.eps + - ${files.directory}/plots/log_logistic_aft.eps + - ${files.directory}/plots/log_logistic_epochs_partial_effect.eps + - ${files.directory}/plots/log_logistic_layers_partial_effect.eps + - ${files.directory}/plots/log_normal_aft.eps + - ${files.directory}/plots/log_normal_epochs_partial_effect.eps + - ${files.directory}/plots/log_normal_layers_partial_effect.eps metrics: - ${files.directory}/plots/aft_comparison.csv outs: @@ -156,10 +163,9 @@ stages: - conf/afr.yaml: - covariates - weibull - - cox - log_logistic - log_normal copy_results: - cmd: cp -r ${files.directory}/plots/* ~/ml_afr/cifar100 + cmd: mkdir -p ~/ml_afr/cifar100 && cp -r ${files.directory}/plots/* ~/ml_afr/cifar100 deps: - ${files.directory}/plots/ diff --git a/examples/pytorch/cifar100/models.sh b/examples/pytorch/cifar100/models.sh index da6eaee5..da7204d3 100644 --- a/examples/pytorch/cifar100/models.sh +++ b/examples/pytorch/cifar100/models.sh @@ -3,10 +3,10 @@ # This script is used to generate the models for the sklearn example. # # Default model -# echo "python -m deckard.layers.optimise ++model.trainer.nb_epoch:1,10,30,50,100" $@ "--multirun" -# python -m deckard.layers.optimise ++model.trainer.nb_epoch:1,10,30,50,100 $@ --multirun +echo "python -m deckard.layers.optimise ++model.trainer.nb_epoch=1,10,30,50,100" $@ "--multirun" +python -m deckard.layers.optimise ++model.trainer.nb_epoch=1,10,30,50,100 $@ --multirun -# # This line generates the model and adds the FeatureSqueezing preprocessing defence. +# This line generates the model and adds the FeatureSqueezing preprocessing defence. python -m deckard.layers.optimise \ ++model.art.preprocessor.name=art.defences.preprocessor.FeatureSqueezing \ +model.art.preprocessor.params.bit_depth=4,8,16,32,64 \ @@ -24,7 +24,7 @@ python -m deckard.layers.optimise \ # # # # Gaussian Noise (Output) python -m deckard.layers.optimise \ ++model.art.postprocessor.name=art.defences.postprocessor.GaussianNoise \ - ++model.art.postprocessor.params.scale=.01,.1,.3,.5,1 \W + ++model.art.postprocessor.params.scale=.01,.1,.3,.5,1 \ ++hydra.sweeper.study_name=gauss-out $@ --multirun # # # # High Confidence diff --git a/examples/pytorch/cifar100/wait.sh b/examples/pytorch/cifar100/wait.sh new file mode 100644 index 00000000..7acc5e65 --- /dev/null +++ b/examples/pytorch/cifar100/wait.sh @@ -0,0 +1,13 @@ +#!/bin/bash +rm -rf waiting.log || true +echo "Trying to allocate gpu" +start=$(date +%s) +until gpu-allocate-cli allocate --duration 72h --wait && echo "Elapsed time: $(( $( date +%s ) - $start )) seconds" >| waiting.log && dvc repro +do + echo "Waiting 30 mins" + sleep 1800 + echo "Trying to allocate gpu" + echo "Elapsed time: $(( $( date +%s ) - $start )) seconds" + echo "Elapsed time in hours: $(( ($(date +%s) - $start) / 3600 )) hours" + echo "Elapsed time in days: $(( ($(date +%s) - $start) / 86400 )) days" +done diff --git a/examples/pytorch/main.sh b/examples/pytorch/main.sh new file mode 100644 index 00000000..34e724ac --- /dev/null +++ b/examples/pytorch/main.sh @@ -0,0 +1,17 @@ +#!/bin/bash +paper_dir=~/ml_afr/ +# set downstream to 2 or nothing +for d in */ ; do + cd $d + # run command and write to log file + dvc repro --downstream clean -f >| dvc_repro.log + # dvc push + cd - +done +# change to paper directory +cd $paper_dir +# run dvc repro and dvc push +dvc repro +dvc push +# change back to original directory +cd - diff --git a/examples/pytorch/mnist/.dvc/config b/examples/pytorch/mnist/.dvc/config index e69de29b..4cf322d9 100644 --- a/examples/pytorch/mnist/.dvc/config +++ b/examples/pytorch/mnist/.dvc/config @@ -0,0 +1,2 @@ +[core] + autostage = true diff --git a/examples/pytorch/mnist/conf/afr.yaml b/examples/pytorch/mnist/conf/afr.yaml index ae09b33b..3585a28e 100644 --- a/examples/pytorch/mnist/conf/afr.yaml +++ b/examples/pytorch/mnist/conf/afr.yaml @@ -1,19 +1,20 @@ covariates: + - "adv_fit_time" - "accuracy" - "train_time" - - "predict_time" - "atk_value" - "def_value" - "data.sample.random_state" - - "adv_failure_rate" - - "model_layers" - - "adv_fit_time" - model.trainer.nb_epoch + - "model_layers" +# - atk_gen +# - def_gen + - predict_time fillna: model.trainer.nb_epoch: 20 weibull: plot: - file : weibull_aft.pdf + file : weibull_aft.eps title : Weibull AFR Model labels: "Intercept: rho_": "$\\rho$" @@ -25,13 +26,16 @@ weibull: "adv_accuracy: lambda_": "Adv. Accuracy" "accuracy: lambda_": "Ben. Accuracy" "adv_fit_time: lambda_": "$t_{attack}$" - "adv_failure_rate: lambda_": "$h_{adv.}(t;\\theta)$" - "failure_rate: lambda_": "$h_{ben.}(t;\\theta)$" + "adv_failure_rate: lambda_": "$f_{adv.}(t;\\theta)$" + "failure_rate: lambda_": "$f_{ben.}(t;\\theta)$" "model.trainer.nb_epoch: lambda_": "No. of Epochs" "model.trainer.batch_size: lambda_": "Batch Size" "def_gen": "Defence" + "model_layers: lambda_" : "Layers" + "def_value: lambda_" : "Defence Strength" + "predict_time: lambda_" : "$t_{predict}$" partial_effect: - - "file": "weibull_epochs_partial_effect.pdf" + - "file": "weibull_epochs_partial_effect.eps" "covariate_array": "model.trainer.nb_epoch" "values_array": [1,10,25,50] "title": "$S(t)$ for Weibull AFR" @@ -41,19 +45,19 @@ weibull: "title": "Epochs", "labels": ["1", "10", "25", "50"] } - - "file": "weibull_layers_partial_effect.pdf" + - "file": "weibull_layers_partial_effect.eps" "covariate_array": "model_layers" "values_array": [18, 34, 50, 101, 152] - "title": "$S(t)$ for Cox AFR" + "title": "$S(t)$ for Weibull AFR" "ylabel": "Expectation of $S(t)$" "xlabel": "Time $T$ (seconds)" "legend_kwargs": { - "title": "ResNet Layers", + "title": "Layers", "labels": ["18", "34", "50", "101", "152"] } # cox: # plot: -# file : cox_aft.pdf +# file : cox_aft.eps # title : Cox AFR Model # labels: # "Intercept: rho_": "$\\rho$" @@ -65,13 +69,13 @@ weibull: # "adv_accuracy: lambda_": "Adv. Accuracy" # "accuracy: lambda_": "Ben. Accuracy" # "adv_fit_time: lambda_": "$t_{attack}$" -# "adv_failure_rate: lambda_": "$h_{adv.}(t;\\theta)$" -# "failure_rate: lambda_": "$h_{ben.}(t;\\theta)$" +# "adv_failure_rate: lambda_": "$f_{adv.}(t;\\theta)$" +# "failure_rate: lambda_": "$f_{ben.}(t;\\theta)$" # "model.trainer.nb_epoch: lambda_": "No. of Epochs" # "model.trainer.batch_size: lambda_": "Batch Size" # "def_gen": "Defence" # partial_effect: -# - "file": "cox_epochs_partial_effect.pdf" +# - "file": "cox_epochs_partial_effect.eps" # "covariate_array": "model.trainer.nb_epoch" # "values_array": [1,10,25,50] # "title": "$S(t)$ for Cox AFR" @@ -81,37 +85,40 @@ weibull: # "title": "Epochs", # "labels": ["1", "10", "25", "50"] # } -# - "file": "cox_layers_partial_effect.pdf" +# - "file": "cox_layers_partial_effect.eps" # "covariate_array": "model_layers" # "values_array": [18, 34, 50, 101, 152] # "title": "$S(t)$ for Cox AFR" # "ylabel": "Expectation of $S(t)$" # "xlabel": "Time $T$ (seconds)" # "legend_kwargs": { -# "title": "ResNet Layers", +# "title": "Layers", # "labels": ["18", "34", "50", "101", "152"] # } log_logistic: plot: - file : log_logistic_aft.pdf + file : log_logistic_aft.eps title : Log logistic AFR Model labels: - "Intercept: rho_": "$\\rho$" - "Intercept: lambda_": "$\\lambda$" - "data.sample.random_state: lambda_": "Random State" - "atk_value: lambda_": "Attack Strength" - "train_time: lambda_": "$t_{train}$" - "predict_proba_time: lambda_": "$t_{predict}$" - "adv_accuracy: lambda_": "Adv. Accuracy" - "accuracy: lambda_": "Ben. Accuracy" - "adv_fit_time: lambda_": "$t_{attack}$" - "adv_failure_rate: lambda_": "$h_{adv.}(t;\\theta)$" - "failure_rate: lambda_": "$h_{ben.}(t;\\theta)$" - "model.trainer.nb_epoch: lambda_": "No. of Epochs" - "model.trainer.batch_size: lambda_": "Batch Size" + "Intercept: beta_": "$\\beta$" + "Intercept: alpha_": "$\\alpha$" + "data.sample.random_state: alpha_": "Random State" + "atk_value: alpha_": "Attack Strength" + "train_time: alpha_": "$t_{train}$" + "predict_proba_time: alpha_": "$t_{predict}$" + "adv_accuracy: alpha_": "Adv. Accuracy" + "accuracy: alpha_": "Ben. Accuracy" + "adv_fit_time: alpha_": "$t_{attack}$" + "adv_failure_rate: alpha_": "$f_{adv.}(t;\\theta)$" + "failure_rate: alpha_": "$f_{ben.}(t;\\theta)$" + "model.trainer.nb_epoch: alpha_": "No. of Epochs" + "model.trainer.batch_size: alpha_": "Batch Size" "def_gen": "Defence" + "model_layers: alpha_" : "Layers" + "def_value: alpha_" : "Defence Strength" + "predict_time: alpha_" : "$t_{predict}$" partial_effect: - - "file": "log_logistic_epochs_partial_effect.pdf" + - "file": "log_logistic_epochs_partial_effect.eps" "covariate_array": "model.trainer.nb_epoch" "values_array": [1,10,25,50] "title": "$S(t)$ for Log-Logistic AFR" @@ -121,37 +128,40 @@ log_logistic: "title": "Epochs", "labels": ["1", "10", "25", "50"] } - - "file": "log_logistic_layers_partial_effect.pdf" + - "file": "log_logistic_layers_partial_effect.eps" "covariate_array": "model_layers" "values_array": [18, 34, 50, 101, 152] - "title": "$S(t)$ for Cox AFR" + "title": "$S(t)$ for Log Logistic AFR" "ylabel": "Expectation of $S(t)$" "xlabel": "Time $T$ (seconds)" "legend_kwargs": { - "title": "ResNet Layers", + "title": "Layers", "labels": ["18", "34", "50", "101", "152"] } log_normal: plot: - file : log_normal_aft.pdf + file : log_normal_aft.eps title : Log Normal AFR Model labels: - "Intercept: rho_": "$\\rho$" - "Intercept: lambda_": "$\\lambda$" - "data.sample.random_state: lambda_": "Random State" - "atk_value: lambda_": "Attack Strength" - "train_time: lambda_": "$t_{train}$" - "predict_proba_time: lambda_": "$t_{predict}$" - "adv_accuracy: lambda_": "Adv. Accuracy" - "accuracy: lambda_": "Ben. Accuracy" - "adv_fit_time: lambda_": "$t_{attack}$" - "adv_failure_rate: lambda_": "$h_{adv.}(t;\\theta)$" - "failure_rate: lambda_": "$h_{ben.}(t;\\theta)$" - "model.trainer.nb_epoch: lambda_": "No. of Epochs" - "model.trainer.batch_size: lambda_": "Batch Size" + "Intercept: sigma_": "$\\rho$" + "Intercept: mu_": "$\\mu$" + "data.sample.random_state: mu_": "Random State" + "atk_value: mu_": "Attack Strength" + "train_time: mu_": "$t_{train}$" + "predict_proba_time: mu_": "$t_{predict}$" + "adv_accuracy: mu_": "Adv. Accuracy" + "accuracy: mu_": "Ben. Accuracy" + "adv_fit_time: mu_": "$t_{attack}$" + "adv_failure_rate: mu_": "$f_{adv.}(t;\\theta)$" + "failure_rate: mu_": "$f_{ben.}(t;\\theta)$" + "model.trainer.nb_epoch: mu_": "No. of Epochs" + "model.trainer.batch_size: mu_": "Batch Size" "def_gen": "Defence" + "model_layers: mu_" : "Layers" + "def_value: mu_" : "Defence Strength" + "predict_time: mu_" : "$t_{predict}$" partial_effect: - - "file": "log_normal_epochs_partial_effect.pdf" + - "file": "log_normal_epochs_partial_effect.eps" "covariate_array": "model.trainer.nb_epoch" "values_array": [1,10,25,50] "title": "$S(t)$ for Log-Normal AFR" @@ -161,13 +171,13 @@ log_normal: "title": "Epochs", "labels": ["1", "10", "25", "50"] } - - "file": "log_normal_layers_partial_effect.pdf" + - "file": "log_normal_layers_partial_effect.eps" "covariate_array": "model_layers" "values_array": [18, 34, 50, 101, 152] - "title": "$S(t)$ for Cox AFR" + "title": "$S(t)$ for Log Normal AFR" "ylabel": "Expectation of $S(t)$" "xlabel": "Time $T$ (seconds)" "legend_kwargs": { - "title": "ResNet Layers", + "title": "Layers", "labels": ["18", "34", "50", "101", "152"] } diff --git a/examples/pytorch/mnist/conf/clean.yaml b/examples/pytorch/mnist/conf/clean.yaml index 7843aba8..e9c215ab 100644 --- a/examples/pytorch/mnist/conf/clean.yaml +++ b/examples/pytorch/mnist/conf/clean.yaml @@ -11,7 +11,7 @@ defences: GaussianAugmentation: Gauss-in GaussianNoise: Gauss-out HighConfidence: Conf - nb_epoch: Epochs + nb_epoch : Epochs model_layers: Control params: Deep: attack.init.kwargs.nb_grads @@ -26,5 +26,9 @@ params: Gauss-in: model.art.pipeline.preprocessor.kwargs.sigma Control: model_layers Epochs: model.trainer.nb_epoch + control: + model_layers: 18 + defaults: + model.trainer.nb_epoch: 20 fillna: - Epochs: 20 + model.trainer.nb_epoch : 20 diff --git a/examples/pytorch/mnist/conf/plots.yaml b/examples/pytorch/mnist/conf/plots.yaml index 464f53b3..b684aec3 100644 --- a/examples/pytorch/mnist/conf/plots.yaml +++ b/examples/pytorch/mnist/conf/plots.yaml @@ -1,5 +1,5 @@ cat_plot: -- file: adv_accuracy_vs_defence_type.pdf +- file: adv_accuracy_vs_defence_type.eps hue: model_name kind: boxen set: @@ -16,7 +16,7 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: ben_accuracy_vs_defence_type.pdf +- file: ben_accuracy_vs_defence_type.eps hue: model_name kind: boxen titles: Ben. Accuracy vs Defence Type @@ -31,7 +31,7 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: ben_failures_per_train_time_vs_defence_type.pdf +- file: ben_failures_per_train_time_vs_defence_type.eps hue: model_name kind: boxen set: @@ -48,7 +48,7 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: adv_failures_per_train_time_vs_defence_type.pdf +- file: adv_failures_per_train_time_vs_defence_type.eps hue: model_name kind: boxen set: @@ -65,7 +65,7 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: adv_failures_per_train_time_vs_attack_type.pdf +- file: adv_failures_per_train_time_vs_attack_type.eps hue: model_name kind: boxen legend_title: Model Name @@ -83,15 +83,15 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: adv_failures_per_test_time_vs_defence_type.pdf +- file: adv_failures_per_test_time_vs_defence_type.eps hue: model_name kind: boxen legend_title: Model Name - titles: $h_{adv}$ vs Defence Type + titles: $f_{adv}$ vs Defence Type x: def_gen xlabels: Defence Type y: adv_failure_rate - ylabels: $h_{adv.}$ + ylabels: $f_{adv.}$ rotation : 90 hue_order: - ResNet18 @@ -99,23 +99,8 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: adv_accuracy_vs_defence_type.pdf - hue: model_name - kind: boxen - legend_title: Model Name - titles: Adv. Accuracy vs Defence Type - x: def_gen - xlabels: Defence Type - y: adv_accuracy - ylabels: Adv. Ben. Accuracy - rotation : 90 - hue_order: - - ResNet18 - - ResNet34 - - ResNet50 - - ResNet101 - - ResNet152 -- file: adv_accuracy_vs_attack_type.pdf + +- file: adv_accuracy_vs_attack_type.eps hue: model_name kind: boxen legend_title: Model Name @@ -131,17 +116,17 @@ cat_plot: - ResNet50 - ResNet101 - ResNet152 -- file: ben_failure_rate_vs_defence_type.pdf +- file: ben_failure_rate_vs_defence_type.eps hue: model_name kind: boxen legend_title: Model Name set: yscale: log - titles: $h_{ben}(t; \theta)$ vs Defence Type + titles: $f_{ben}(t; \theta)$ vs Defence Type x: def_gen xlabels: Defence Type y: failure_rate - ylabels: $h_{ben}(t; \theta)$ + ylabels: $f_{ben}(t; \theta)$ rotation : 90 hue_order: - ResNet18 @@ -150,7 +135,7 @@ cat_plot: - ResNet101 - ResNet152 line_plot: -- file: def_param_vs_accuracy.pdf +- file: def_param_vs_accuracy.eps hue: def_gen legend: {"bbox_to_anchor": [1.05, 1], "title": "Defence"} title: Ben. Accuracy vs Defence Strength @@ -160,15 +145,17 @@ line_plot: y: accuracy y_scale: ylabel: Ben. Accuracy - # hue_order: - # - Control - # - Conf - # - Epochs - # - Gauss-in - # - Gauss-out - # - Conf - # - FSQ -- file: def_param_vs_adv_accuracy.pdf + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars +- file: def_param_vs_adv_accuracy.eps hue: def_gen legend: {"bbox_to_anchor": [1.05, 1], "title": "Defence"} title: Adv. Accuracy vs Defence Strength @@ -178,33 +165,37 @@ line_plot: y: adv_accuracy y_scale: ylabel: Adv. Accuracy - # hue_order: - # - Control - # - Conf - # - Epochs - # - Gauss-in - # - Gauss-out - # - Conf - # - FSQ -- file: def_param_vs_adv_failure_rate.pdf + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars +- file: def_param_vs_adv_failure_rate.eps hue: def_gen legend: {"bbox_to_anchor": [1.05, 1], "title": "Defence"} - title: $h_{adv}$ vs Defence Strength + title: $f_{adv}$ vs Defence Strength x: def_value - x_scale: linear + x_scale: linear xlabel: Defence Control Parameter y: adv_failure_rate - y_scale: log - ylabel: $h_{adv.}$ - # hue_order: - # - Control - # - Conf - # - Epochs - # - Gauss-in - # - Gauss-out - # - Conf - # - FSQ -- file: atk_param_vs_accuracy.pdf + y_scale: linear + ylabel: $f_{adv.}$ + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars +- file: atk_param_vs_accuracy.eps hue: atk_gen legend: {bbox_to_anchor: [1.05, 1]} title: Adv. Accuracy vs Attack Strength @@ -221,16 +212,17 @@ line_plot: - HSJ - Pixel - Thresh - + errorbar: se + err_style: bars scatter_plot: - x: train_time_per_sample y: adv_failure_rate hue: model_name xlabel: $t_{train}$ - ylabel: $h_{adv}$ - title: $h_{adv}$ vs $t_{train}$ - file: adv_failure_rate_vs_train_time.pdf - y_scale: log + ylabel: $f_{adv}$ + title: $f_{adv}$ vs $t_{train}$ + file: adv_failure_rate_vs_train_time.eps + # y_scale: log x_scale: log legend: title: Model Name diff --git a/examples/pytorch/mnist/dvc.lock b/examples/pytorch/mnist/dvc.lock index a1fb908f..db59a303 100644 --- a/examples/pytorch/mnist/dvc.lock +++ b/examples/pytorch/mnist/dvc.lock @@ -1,46 +1,299 @@ schema: '2.0' stages: - compile@attack: - cmd: python -m deckard.layers.compile --report_folder mnist/reports/attack --results_file - mnist/reports/attack.csv + clean@attack: + cmd: python -m deckard.layers.clean_data -i mnist/reports/attack.csv -o mnist/reports/clean_attack.csv + -c conf/clean.yaml deps: - - path: mnist/reports/attack/ - hash: md5 - md5: a2694f8e9ea5c5ff400d11fa1d98001d.dir - size: 21895572255 - nfiles: 53006 - - path: mnist/reports/attack/ResNet101.db - md5: 600452804d96c8b8483c3f8da01130c4 - size: 462848 - - path: mnist/reports/attack/ResNet18.db - md5: 920b0ed178ec504c0d7990777862283f - size: 1363968 - - path: mnist/reports/attack/ResNet34.db - md5: 3f56dd2ea0783a56a2a9e3eaaad88c21 - size: 1945600 - - path: mnist/reports/attack/ResNet50.db - md5: d9ee221b942b56d9bb720e022e05bf4b - size: 462848 - outs: - path: mnist/reports/attack.csv - hash: md5 - md5: aa61d0f5118327d164cfc37a466e5162 - size: 47376297 + md5: 51d779c26865540247a82408cd6a46d0 + size: 189645846 + params: + params.yaml: + files.directory: mnist + files.reports: reports + conf/clean.yaml: + attacks: + DeepFool: Deep + FastGradientMethod: FGM + HopSkipJump: HSJ + PixelAttack: Pixel + ProjectedGradientDescent: PGD + ThresholdAttack: Thresh + defences: + Control: Control + FeatureSqueezing: FSQ + GaussianAugmentation: Gauss-in + GaussianNoise: Gauss-out + HighConfidence: Conf + nb_epoch: Epochs + model_layers: Control + fillna: + model.trainer.nb_epoch: 20 + params: + Deep: attack.init.kwargs.nb_grads + FGM: attack.init.kwargs.eps + HSJ: attack.init.kwargs.max_iter + Pixel: attack.init.kwargs.th + PGD: attack.init.kwargs.eps + Thresh: attack.init.kwargs.th + Gauss-out: model.art.pipeline.postprocessor.kwargs.scale + Conf: model.art.pipeline.postprocessor.kwargs.cutoff + FSQ: model.art.pipeline.preprocessor.kwargs.bit_depth + Gauss-in: model.art.pipeline.preprocessor.kwargs.sigma + Control: model_layers + Epochs: model.trainer.nb_epoch + control: + model_layers: 18 + defaults: + model.trainer.nb_epoch: 20 + outs: + - path: mnist/reports/clean_attack.csv + md5: 2c19e993bd04189f0f47a38eda17f3b9 + size: 43039659 + afr: + cmd: python -m deckard.layers.afr --dataset mnist --data_file mnist/reports/clean_attack.csv --target + adv_accuracy --duration_col predict_time --dataset mnist --config_file conf/afr.yaml + --plots_folder mnist/plots/ + deps: + - path: mnist/reports/clean_attack.csv + md5: 2c19e993bd04189f0f47a38eda17f3b9 + size: 43039659 + params: + params.yaml: + files.directory: mnist + conf/afr.yaml: + covariates: + - adv_fit_time + - accuracy + - train_time + - atk_value + - def_value + - data.sample.random_state + - model.trainer.nb_epoch + - model_layers + - predict_time + log_logistic: + plot: + file: log_logistic_aft.eps + title: Log logistic AFR Model + labels: + 'Intercept: beta_': $\beta$ + 'Intercept: alpha_': $\alpha$ + 'data.sample.random_state: alpha_': Random State + 'atk_value: alpha_': Attack Strength + 'train_time: alpha_': $t_{train}$ + 'predict_proba_time: alpha_': $t_{predict}$ + 'adv_accuracy: alpha_': Adv. Accuracy + 'accuracy: alpha_': Ben. Accuracy + 'adv_fit_time: alpha_': $t_{attack}$ + 'adv_failure_rate: alpha_': $f_{adv.}(t;\theta)$ + 'failure_rate: alpha_': $f_{ben.}(t;\theta)$ + 'model.trainer.nb_epoch: alpha_': No. of Epochs + 'model.trainer.batch_size: alpha_': Batch Size + def_gen: Defence + 'model_layers: alpha_': Layers + 'def_value: alpha_': Defence Strength + 'predict_time: alpha_': $t_{predict}$ + partial_effect: + - file: log_logistic_epochs_partial_effect.eps + covariate_array: model.trainer.nb_epoch + values_array: + - 1 + - 10 + - 25 + - 50 + title: $S(t)$ for Log-Logistic AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Epochs + labels: + - '1' + - '10' + - '25' + - '50' + - file: log_logistic_layers_partial_effect.eps + covariate_array: model_layers + values_array: + - 18 + - 34 + - 50 + - 101 + - 152 + title: $S(t)$ for Log Logistic AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Layers + labels: + - '18' + - '34' + - '50' + - '101' + - '152' + log_normal: + plot: + file: log_normal_aft.eps + title: Log Normal AFR Model + labels: + 'Intercept: sigma_': $\rho$ + 'Intercept: mu_': $\mu$ + 'data.sample.random_state: mu_': Random State + 'atk_value: mu_': Attack Strength + 'train_time: mu_': $t_{train}$ + 'predict_proba_time: mu_': $t_{predict}$ + 'adv_accuracy: mu_': Adv. Accuracy + 'accuracy: mu_': Ben. Accuracy + 'adv_fit_time: mu_': $t_{attack}$ + 'adv_failure_rate: mu_': $f_{adv.}(t;\theta)$ + 'failure_rate: mu_': $f_{ben.}(t;\theta)$ + 'model.trainer.nb_epoch: mu_': No. of Epochs + 'model.trainer.batch_size: mu_': Batch Size + def_gen: Defence + 'model_layers: mu_': Layers + 'def_value: mu_': Defence Strength + 'predict_time: mu_': $t_{predict}$ + partial_effect: + - file: log_normal_epochs_partial_effect.eps + covariate_array: model.trainer.nb_epoch + values_array: + - 1 + - 10 + - 25 + - 50 + title: $S(t)$ for Log-Normal AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Epochs + labels: + - '1' + - '10' + - '25' + - '50' + - file: log_normal_layers_partial_effect.eps + covariate_array: model_layers + values_array: + - 18 + - 34 + - 50 + - 101 + - 152 + title: $S(t)$ for Log Normal AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Layers + labels: + - '18' + - '34' + - '50' + - '101' + - '152' + weibull: + plot: + file: weibull_aft.eps + title: Weibull AFR Model + labels: + 'Intercept: rho_': $\rho$ + 'Intercept: lambda_': $\lambda$ + 'data.sample.random_state: lambda_': Random State + 'atk_value: lambda_': Attack Strength + 'train_time: lambda_': $t_{train}$ + 'predict_proba_time: lambda_': $t_{predict}$ + 'adv_accuracy: lambda_': Adv. Accuracy + 'accuracy: lambda_': Ben. Accuracy + 'adv_fit_time: lambda_': $t_{attack}$ + 'adv_failure_rate: lambda_': $f_{adv.}(t;\theta)$ + 'failure_rate: lambda_': $f_{ben.}(t;\theta)$ + 'model.trainer.nb_epoch: lambda_': No. of Epochs + 'model.trainer.batch_size: lambda_': Batch Size + def_gen: Defence + 'model_layers: lambda_': Layers + 'def_value: lambda_': Defence Strength + 'predict_time: lambda_': $t_{predict}$ + partial_effect: + - file: weibull_epochs_partial_effect.eps + covariate_array: model.trainer.nb_epoch + values_array: + - 1 + - 10 + - 25 + - 50 + title: $S(t)$ for Weibull AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Epochs + labels: + - '1' + - '10' + - '25' + - '50' + - file: weibull_layers_partial_effect.eps + covariate_array: model_layers + values_array: + - 18 + - 34 + - 50 + - 101 + - 152 + title: $S(t)$ for Weibull AFR + ylabel: Expectation of $S(t)$ + xlabel: Time $T$ (seconds) + legend_kwargs: + title: Layers + labels: + - '18' + - '34' + - '50' + - '101' + - '152' + outs: + - path: mnist/plots/aft_comparison.csv + md5: f59decd89bd1e9a684ea77c280dd4977 + size: 375 + - path: mnist/plots/aft_comparison.tex + md5: 9ba120db593233d5811417ba894d3551 + size: 566 + - path: mnist/plots/log_logistic_aft.eps + md5: b7b0671479b42062562f0c03d8c0975f + size: 43143 + - path: mnist/plots/log_logistic_epochs_partial_effect.eps + md5: 6d4454d35005dfd34dc25de5691b05b3 + size: 45110 + - path: mnist/plots/log_logistic_layers_partial_effect.eps + md5: 04f63bb1af4c6cc182f031b0a6f2f570 + size: 46247 + - path: mnist/plots/log_normal_aft.eps + md5: 3c5895eed8d86f1bb68e50a9e8d705fb + size: 44109 + - path: mnist/plots/log_normal_epochs_partial_effect.eps + md5: e273ac270f3c7a04257edf27250f0451 + size: 45828 + - path: mnist/plots/log_normal_layers_partial_effect.eps + md5: 0bd9b1734275002d5fba203829d9ace6 + size: 46723 + - path: mnist/plots/weibull_aft.eps + md5: 7671058fb0db407eef4a01464e324a5c + size: 41411 + - path: mnist/plots/weibull_epochs_partial_effect.eps + md5: b77d411ad6499a3a77a38486ef67e600 + size: 44468 + - path: mnist/plots/weibull_layers_partial_effect.eps + md5: 90c8f92165a288641f249f866d8c25d0 + size: 45557 plot: cmd: python -m deckard.layers.plots --path mnist/plots/ --file mnist/reports/clean_attack.csv -c conf/plots.yaml deps: - path: mnist/reports/clean_attack.csv - hash: md5 - md5: ada4d505a6c72408e50b929b2869ffee - size: 19747128 + md5: 2c19e993bd04189f0f47a38eda17f3b9 + size: 43039659 params: - params.yaml: - files.directory: mnist - files.reports: reports conf/plots.yaml: cat_plot: - - file: adv_accuracy_vs_defence_type.pdf + - file: adv_accuracy_vs_defence_type.eps hue: model_name kind: boxen set: @@ -57,7 +310,7 @@ stages: - ResNet50 - ResNet101 - ResNet152 - - file: ben_accuracy_vs_defence_type.pdf + - file: ben_accuracy_vs_defence_type.eps hue: model_name kind: boxen titles: Ben. Accuracy vs Defence Type @@ -72,7 +325,7 @@ stages: - ResNet50 - ResNet101 - ResNet152 - - file: ben_failures_per_train_time_vs_defence_type.pdf + - file: ben_failures_per_train_time_vs_defence_type.eps hue: model_name kind: boxen set: @@ -89,7 +342,7 @@ stages: - ResNet50 - ResNet101 - ResNet152 - - file: adv_failures_per_train_time_vs_defence_type.pdf + - file: adv_failures_per_train_time_vs_defence_type.eps hue: model_name kind: boxen set: @@ -106,7 +359,7 @@ stages: - ResNet50 - ResNet101 - ResNet152 - - file: adv_failures_per_train_time_vs_attack_type.pdf + - file: adv_failures_per_train_time_vs_attack_type.eps hue: model_name kind: boxen legend_title: Model Name @@ -124,31 +377,15 @@ stages: - ResNet50 - ResNet101 - ResNet152 - - file: adv_failures_per_test_time_vs_defence_type.pdf + - file: adv_failures_per_test_time_vs_defence_type.eps hue: model_name kind: boxen legend_title: Model Name - titles: $h_{adv}$ vs Defence Type + titles: $f_{adv}$ vs Defence Type x: def_gen xlabels: Defence Type y: adv_failure_rate - ylabels: $h_{adv.}$ - rotation: 90 - hue_order: - - ResNet18 - - ResNet34 - - ResNet50 - - ResNet101 - - ResNet152 - - file: adv_accuracy_vs_defence_type.pdf - hue: model_name - kind: boxen - legend_title: Model Name - titles: Adv. Accuracy vs Defence Type - x: def_gen - xlabels: Defence Type - y: adv_accuracy - ylabels: Adv. Ben. Accuracy + ylabels: $f_{adv.}$ rotation: 90 hue_order: - ResNet18 @@ -156,7 +393,7 @@ stages: - ResNet50 - ResNet101 - ResNet152 - - file: adv_accuracy_vs_attack_type.pdf + - file: adv_accuracy_vs_attack_type.eps hue: model_name kind: boxen legend_title: Model Name @@ -172,17 +409,17 @@ stages: - ResNet50 - ResNet101 - ResNet152 - - file: ben_failure_rate_vs_defence_type.pdf + - file: ben_failure_rate_vs_defence_type.eps hue: model_name kind: boxen legend_title: Model Name set: yscale: log - titles: $h_{ben}(t; \theta)$ vs Defence Type + titles: $f_{ben}(t; \theta)$ vs Defence Type x: def_gen xlabels: Defence Type y: failure_rate - ylabels: $h_{ben}(t; \theta)$ + ylabels: $f_{ben}(t; \theta)$ rotation: 90 hue_order: - ResNet18 @@ -191,7 +428,7 @@ stages: - ResNet101 - ResNet152 line_plot: - - file: def_param_vs_accuracy.pdf + - file: def_param_vs_accuracy.eps hue: def_gen legend: bbox_to_anchor: @@ -205,7 +442,17 @@ stages: y: accuracy y_scale: ylabel: Ben. Accuracy - - file: def_param_vs_adv_accuracy.pdf + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars + - file: def_param_vs_adv_accuracy.eps hue: def_gen legend: bbox_to_anchor: @@ -219,21 +466,41 @@ stages: y: adv_accuracy y_scale: ylabel: Adv. Accuracy - - file: def_param_vs_adv_failure_rate.pdf + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars + - file: def_param_vs_adv_failure_rate.eps hue: def_gen legend: bbox_to_anchor: - 1.05 - 1 title: Defence - title: $h_{adv}$ vs Defence Strength + title: $f_{adv}$ vs Defence Strength x: def_value x_scale: linear xlabel: Defence Control Parameter y: adv_failure_rate - y_scale: log - ylabel: $h_{adv.}$ - - file: atk_param_vs_accuracy.pdf + y_scale: linear + ylabel: $f_{adv.}$ + hue_order: + - Control + - Conf + - Epochs + - Gauss-in + - Gauss-out + - Conf + - FSQ + errorbar: se + err_style: bars + - file: atk_param_vs_accuracy.eps hue: atk_gen legend: bbox_to_anchor: @@ -253,15 +520,16 @@ stages: - HSJ - Pixel - Thresh + errorbar: se + err_style: bars scatter_plot: - x: train_time_per_sample y: adv_failure_rate hue: model_name xlabel: $t_{train}$ - ylabel: $h_{adv}$ - title: $h_{adv}$ vs $t_{train}$ - file: adv_failure_rate_vs_train_time.pdf - y_scale: log + ylabel: $f_{adv}$ + title: $f_{adv}$ vs $t_{train}$ + file: adv_failure_rate_vs_train_time.eps x_scale: log legend: title: Model Name @@ -275,106 +543,102 @@ stages: - ResNet101 - ResNet152 outs: - - path: mnist/plots/adv_accuracy_vs_attack_type.pdf - hash: md5 - md5: 175188b326b4c519d51274f461fe4b56 - size: 32685 - - path: mnist/plots/adv_accuracy_vs_defence_type.pdf - hash: md5 - md5: 65201d5844ae16efa9e675d3622c6c6d - size: 33020 - - path: mnist/plots/adv_failure_rate_vs_train_time.pdf - hash: md5 - md5: ee56e84db4091efa7b1526ec51fdfdd5 - size: 96055 - - path: mnist/plots/adv_failures_per_test_time_vs_defence_type.pdf - hash: md5 - md5: bdd64c58fc2cfdc344ec81a0642be597 - size: 40000 - - path: mnist/plots/adv_failures_per_train_time_vs_attack_type.pdf - hash: md5 - md5: 1216c3aa5898e874f430c7734b26ea56 - size: 41252 - - path: mnist/plots/adv_failures_per_train_time_vs_defence_type.pdf - hash: md5 - md5: 8f04fa98643a1210910ef8657df1c2fa - size: 37353 - - path: mnist/plots/atk_param_vs_accuracy.pdf - hash: md5 - md5: 66c481c894939696b9f217570790fcec - size: 20223 - - path: mnist/plots/ben_accuracy_vs_defence_type.pdf - hash: md5 - md5: ff6f4bb8bde362750bf5878eea7e13ff - size: 30124 - - path: mnist/plots/ben_failure_rate_vs_defence_type.pdf - hash: md5 - md5: 983850e56fcd0ad50d85e630d61c82a5 - size: 42600 - - path: mnist/plots/ben_failures_per_train_time_vs_defence_type.pdf - hash: md5 - md5: e62b68ef46db339eb9def2f4959be2b3 - size: 37589 - - path: mnist/plots/def_param_vs_accuracy.pdf - hash: md5 - md5: e1b0f111bc5d7797256be82187586023 - size: 20548 - - path: mnist/plots/def_param_vs_adv_accuracy.pdf - hash: md5 - md5: 310f01fdaa47a92500f03d728cd6b117 - size: 19862 - - path: mnist/plots/def_param_vs_adv_failure_rate.pdf - hash: md5 - md5: d376c93f824251f3508b0a7e3c365020 - size: 23632 - afr: - cmd: python -m deckard.layers.afr --dataset mnist --data_file mnist/reports/clean_attack.csv --duration_col - adv_fit_time --dataset mnist --config_file conf/afr.yaml --plots_folder mnist/plots/ + - path: mnist/plots/adv_accuracy_vs_attack_type.eps + md5: d51200cf3e8cefd7517a97c507c32a92 + size: 123974 + - path: mnist/plots/adv_accuracy_vs_defence_type.eps + md5: 1142a65c254df12224d88187d580c756 + size: 118866 + - path: mnist/plots/adv_failure_rate_vs_train_time.eps + md5: 584a893a5e386ce8af1b49dd4333f27a + size: 987785 + - path: mnist/plots/adv_failures_per_test_time_vs_defence_type.eps + md5: ae31c6898b60fc8c4b2d5ed523e8648b + size: 138229 + - path: mnist/plots/adv_failures_per_train_time_vs_attack_type.eps + md5: 32d8a97ee6fa8bfb385cd10cef9985a0 + size: 128046 + - path: mnist/plots/adv_failures_per_train_time_vs_defence_type.eps + md5: 191c566d64c2f94b5832578951a1ca3a + size: 122522 + - path: mnist/plots/atk_param_vs_accuracy.eps + md5: 2d20b9e7214b8b04c8d6ba60e59be5a0 + size: 39701 + - path: mnist/plots/ben_accuracy_vs_defence_type.eps + md5: c94bb2af2ed7a26bf6d1f232d010671a + size: 112716 + - path: mnist/plots/ben_failure_rate_vs_defence_type.eps + md5: ab115d786b80015aba9efe0b03df9ada + size: 140041 + - path: mnist/plots/ben_failures_per_train_time_vs_defence_type.eps + md5: e2e8499a4b6fc81d50e18d6b4bdf29f7 + size: 123124 + - path: mnist/plots/def_param_vs_accuracy.eps + md5: a37bec164c0ba420ef4c20cab952cd9a + size: 39660 + - path: mnist/plots/def_param_vs_adv_accuracy.eps + md5: 09af779c5fd3c9b132bd5270516a6e53 + size: 39631 + - path: mnist/plots/def_param_vs_adv_failure_rate.eps + md5: 706238ba734e5a65642f12e105365aa9 + size: 39067 + copy_results: + cmd: mkdir -p ~/ml_afr/mnist/ && cp -r mnist/plots/* ~/ml_afr/mnist/ + deps: + - path: mnist/plots/ + md5: 37a3df39d57394928f1d659d522f9544.dir + size: 3021764 + nfiles: 35 + - path: mnist/plots/ + md5: 37a3df39d57394928f1d659d522f9544.dir + size: 3021764 + nfiles: 35 + afr_with_dummies: + cmd: python -m deckard.layers.afr --dataset mnist --data_file mnist/reports/clean_attack.csv --target + adv_accuracy --duration_col predict_time --dataset mnist --config_file conf/afr_dummy.yaml + --plots_folder mnist/plots/ --summary_file aft_comparison_with_dummy deps: - - path: mnist/plots/adv_accuracy_vs_defence_type.pdf - hash: md5 - md5: 65201d5844ae16efa9e675d3622c6c6d - size: 33020 - path: mnist/reports/clean_attack.csv - hash: md5 - md5: ada4d505a6c72408e50b929b2869ffee - size: 19747128 + md5: 2c19e993bd04189f0f47a38eda17f3b9 + size: 43039659 params: params.yaml: files.directory: mnist conf/afr.yaml: covariates: + - adv_fit_time - accuracy - train_time - - predict_time - atk_value - def_value - data.sample.random_state - - adv_failure_rate - - model_layers - - adv_fit_time - model.trainer.nb_epoch + - model_layers + - predict_time log_logistic: plot: - file: log_logistic_aft.pdf + file: log_logistic_aft.eps title: Log logistic AFR Model labels: - 'Intercept: rho_': $\rho$ - 'Intercept: lambda_': $\lambda$ - 'data.sample.random_state: lambda_': Random State - 'atk_value: lambda_': Attack Strength - 'train_time: lambda_': $t_{train}$ - 'predict_proba_time: lambda_': $t_{predict}$ - 'adv_accuracy: lambda_': Adv. Accuracy - 'accuracy: lambda_': Ben. Accuracy - 'adv_fit_time: lambda_': $t_{attack}$ - 'adv_failure_rate: lambda_': $h_{adv.}(t;\theta)$ - 'failure_rate: lambda_': $h_{ben.}(t;\theta)$ - 'model.trainer.nb_epoch: lambda_': No. of Epochs - 'model.trainer.batch_size: lambda_': Batch Size + 'Intercept: beta_': $\beta$ + 'Intercept: alpha_': $\alpha$ + 'data.sample.random_state: alpha_': Random State + 'atk_value: alpha_': Attack Strength + 'train_time: alpha_': $t_{train}$ + 'predict_proba_time: alpha_': $t_{predict}$ + 'adv_accuracy: alpha_': Adv. Accuracy + 'accuracy: alpha_': Ben. Accuracy + 'adv_fit_time: alpha_': $t_{attack}$ + 'adv_failure_rate: alpha_': $f_{adv.}(t;\theta)$ + 'failure_rate: alpha_': $f_{ben.}(t;\theta)$ + 'model.trainer.nb_epoch: alpha_': No. of Epochs + 'model.trainer.batch_size: alpha_': Batch Size def_gen: Defence + 'model_layers: alpha_': ResNet Layers + 'def_value: alpha_': Defence Strength + 'predict_time: alpha_': $t_{predict}$ partial_effect: - - file: log_logistic_epochs_partial_effect.pdf + - file: log_logistic_epochs_partial_effect.eps covariate_array: model.trainer.nb_epoch values_array: - 1 @@ -391,7 +655,7 @@ stages: - '10' - '25' - '50' - - file: log_logistic_layers_partial_effect.pdf + - file: log_logistic_layers_partial_effect.eps covariate_array: model_layers values_array: - 18 @@ -399,7 +663,7 @@ stages: - 50 - 101 - 152 - title: $S(t)$ for Cox AFR + title: $S(t)$ for Log Logistic AFR ylabel: Expectation of $S(t)$ xlabel: Time $T$ (seconds) legend_kwargs: @@ -412,25 +676,28 @@ stages: - '152' log_normal: plot: - file: log_normal_aft.pdf + file: log_normal_aft.eps title: Log Normal AFR Model labels: - 'Intercept: rho_': $\rho$ - 'Intercept: lambda_': $\lambda$ - 'data.sample.random_state: lambda_': Random State - 'atk_value: lambda_': Attack Strength - 'train_time: lambda_': $t_{train}$ - 'predict_proba_time: lambda_': $t_{predict}$ - 'adv_accuracy: lambda_': Adv. Accuracy - 'accuracy: lambda_': Ben. Accuracy - 'adv_fit_time: lambda_': $t_{attack}$ - 'adv_failure_rate: lambda_': $h_{adv.}(t;\theta)$ - 'failure_rate: lambda_': $h_{ben.}(t;\theta)$ - 'model.trainer.nb_epoch: lambda_': No. of Epochs - 'model.trainer.batch_size: lambda_': Batch Size + 'Intercept: sigma_': $\rho$ + 'Intercept: mu_': $\mu$ + 'data.sample.random_state: mu_': Random State + 'atk_value: mu_': Attack Strength + 'train_time: mu_': $t_{train}$ + 'predict_proba_time: mu_': $t_{predict}$ + 'adv_accuracy: mu_': Adv. Accuracy + 'accuracy: mu_': Ben. Accuracy + 'adv_fit_time: mu_': $t_{attack}$ + 'adv_failure_rate: mu_': $f_{adv.}(t;\theta)$ + 'failure_rate: mu_': $f_{ben.}(t;\theta)$ + 'model.trainer.nb_epoch: mu_': No. of Epochs + 'model.trainer.batch_size: mu_': Batch Size def_gen: Defence + 'model_layers: mu_': ResNet Layers + 'def_value: mu_': Defence Strength + 'predict_time: mu_': $t_{predict}$ partial_effect: - - file: log_normal_epochs_partial_effect.pdf + - file: log_normal_epochs_partial_effect.eps covariate_array: model.trainer.nb_epoch values_array: - 1 @@ -447,7 +714,7 @@ stages: - '10' - '25' - '50' - - file: log_normal_layers_partial_effect.pdf + - file: log_normal_layers_partial_effect.eps covariate_array: model_layers values_array: - 18 @@ -455,7 +722,7 @@ stages: - 50 - 101 - 152 - title: $S(t)$ for Cox AFR + title: $S(t)$ for Log Normal AFR ylabel: Expectation of $S(t)$ xlabel: Time $T$ (seconds) legend_kwargs: @@ -468,7 +735,7 @@ stages: - '152' weibull: plot: - file: weibull_aft.pdf + file: weibull_aft.eps title: Weibull AFR Model labels: 'Intercept: rho_': $\rho$ @@ -480,13 +747,16 @@ stages: 'adv_accuracy: lambda_': Adv. Accuracy 'accuracy: lambda_': Ben. Accuracy 'adv_fit_time: lambda_': $t_{attack}$ - 'adv_failure_rate: lambda_': $h_{adv.}(t;\theta)$ - 'failure_rate: lambda_': $h_{ben.}(t;\theta)$ + 'adv_failure_rate: lambda_': $f_{adv.}(t;\theta)$ + 'failure_rate: lambda_': $f_{ben.}(t;\theta)$ 'model.trainer.nb_epoch: lambda_': No. of Epochs 'model.trainer.batch_size: lambda_': Batch Size def_gen: Defence + 'model_layers: lambda_': ResNet Layers + 'def_value: lambda_': Defence Strength + 'predict_time: lambda_': $t_{predict}$ partial_effect: - - file: weibull_epochs_partial_effect.pdf + - file: weibull_epochs_partial_effect.eps covariate_array: model.trainer.nb_epoch values_array: - 1 @@ -503,7 +773,7 @@ stages: - '10' - '25' - '50' - - file: weibull_layers_partial_effect.pdf + - file: weibull_layers_partial_effect.eps covariate_array: model_layers values_array: - 18 @@ -511,7 +781,7 @@ stages: - 50 - 101 - 152 - title: $S(t)$ for Cox AFR + title: $S(t)$ for Weibull AFR ylabel: Expectation of $S(t)$ xlabel: Time $T$ (seconds) legend_kwargs: @@ -523,224 +793,36 @@ stages: - '101' - '152' outs: - - path: mnist/plots/aft_comparison.csv - hash: md5 - md5: a35241fbedbd3900b42424e4036f1cdb - size: 183 - - path: mnist/plots/aft_comparison.tex - hash: md5 - md5: c0d2c64f3b2696750aa5de1539312379 - size: 407 - - path: mnist/plots/log_logistic_aft.pdf - hash: md5 - md5: 03f05b16a9504eea7e81652b31de0d00 - size: 23401 - - path: mnist/plots/log_logistic_epochs_partial_effect.pdf - hash: md5 - md5: 535a9ea4255b7577cb300efbd1d256df - size: 27394 - - path: mnist/plots/log_logistic_layers_partial_effect.pdf - hash: md5 - md5: 2a4ab1809b665e226617185e475fb76b - size: 28988 - - path: mnist/plots/log_normal_aft.pdf - hash: md5 - md5: 007fd6530b124cb76995f2d11ccea52f - size: 24471 - - path: mnist/plots/log_normal_epochs_partial_effect.pdf - hash: md5 - md5: 29293af9fa83d8c9a6ca3680183c53c5 - size: 28043 - - path: mnist/plots/log_normal_layers_partial_effect.pdf - hash: md5 - md5: 1848f13abd7b100cb455de45f4fd13fd - size: 29095 - - path: mnist/plots/weibull_aft.pdf - hash: md5 - md5: 0e5d3b23fa0607678359c96c978c0952 - size: 33691 - - path: mnist/plots/weibull_epochs_partial_effect.pdf - hash: md5 - md5: c48ae038ac53ec5844f72eda45812146 - size: 27547 - - path: mnist/plots/weibull_layers_partial_effect.pdf - hash: md5 - md5: e7c2e1ec85fdb54e59bfbe55a9065c5a - size: 29116 - copy_results: - cmd: cp -r mnist/plots/* ~/ml_afr/mnist/ - deps: - - path: mnist/plots/ - hash: md5 - md5: 7baeb7db4aacc8c9e9676f7d04713294.dir - size: 727279 - nfiles: 24 - clean@attack: - cmd: python -m deckard.layers.clean_data -i mnist/reports/attack.csv -o mnist/reports/clean_attack.csv - -c conf/clean.yaml - deps: - - path: mnist/reports/attack.csv - hash: md5 - md5: aa61d0f5118327d164cfc37a466e5162 - size: 47376297 - params: - params.yaml: - files.directory: mnist - files.reports: reports - conf/clean.yaml: - attacks: - DeepFool: Deep - FastGradientMethod: FGM - HopSkipJump: HSJ - PixelAttack: Pixel - ProjectedGradientDescent: PGD - ThresholdAttack: Thresh - defences: - Control: Control - FeatureSqueezing: FSQ - GaussianAugmentation: Gauss-in - GaussianNoise: Gauss-out - HighConfidence: Conf - nb_epoch: Epochs - model_layers: Control - fillna: - Epochs: 20 - params: - Deep: attack.init.kwargs.nb_grads - FGM: attack.init.kwargs.eps - HSJ: attack.init.kwargs.max_iter - Pixel: attack.init.kwargs.th - PGD: attack.init.kwargs.eps - Thresh: attack.init.kwargs.th - Gauss-out: model.art.pipeline.postprocessor.kwargs.scale - Conf: model.art.pipeline.postprocessor.kwargs.cutoff - FSQ: model.art.pipeline.preprocessor.kwargs.bit_depth - Gauss-in: model.art.pipeline.preprocessor.kwargs.sigma - Control: model_layers - Epochs: model.trainer.nb_epoch - outs: - - path: mnist/reports/clean_attack.csv - hash: md5 - md5: ada4d505a6c72408e50b929b2869ffee - size: 19747128 - train: - cmd: python -m deckard.layers.experiment train --config_file mnist.yaml - params: - params.yaml: - data: - _target_: deckard.base.data.Data - generate: - _target_: deckard.base.data.generator.DataGenerator - name: torch_mnist - sample: - _target_: deckard.base.data.sampler.SklearnDataSampler - random_state: 0 - stratify: true - sklearn_pipeline: - _target_: deckard.base.data.sklearn_pipeline.SklearnDataPipeline - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - files: - _target_: deckard.base.files.FileConfig - adv_predictions_file: adv_predictions.json - attack_dir: attacks - attack_file: attack - attack_type: .pkl - data_dir: data - data_file: data - data_type: .pkl - directory: mnist - model_dir: models - model_file: model - model_type: .pt - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - data: - _target_: deckard.base.data.Data - generate: - _target_: deckard.base.data.generator.DataGenerator - name: torch_mnist - sample: - _target_: deckard.base.data.sampler.SklearnDataSampler - random_state: 0 - stratify: true - sklearn_pipeline: - _target_: deckard.base.data.sklearn_pipeline.SklearnDataPipeline - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - initialize: - clip_values: - - 0 - - 255 - criterion: - name: torch.nn.CrossEntropyLoss - optimizer: - lr: 0.01 - momentum: 0.9 - name: torch.optim.SGD - library: pytorch - data: - _target_: deckard.base.data.Data - generate: - _target_: deckard.base.data.generator.DataGenerator - name: torch_mnist - sample: - _target_: deckard.base.data.sampler.SklearnDataSampler - random_state: 0 - stratify: true - sklearn_pipeline: - _target_: deckard.base.data.sklearn_pipeline.SklearnDataPipeline - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - init: - _target_: deckard.base.model.ModelInitializer - name: torch_example.ResNet18 - num_channels: 1 - library: pytorch - trainer: - batch_size: 1024 - nb_epoch: 100 - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: mnist/data/data.pkl - hash: md5 - md5: de934a5f5157970e5f30b8f3f1856a68 - size: 222320311 - - path: mnist/models/model.optimizer.pt - hash: md5 - md5: 1e527d70896a4a05a2d6ac103382cd50 - size: 44780845 - - path: mnist/models/model.pt - hash: md5 - md5: f01e051c7b7dfa20eca3fe1caab0b25e - size: 44785941 - - path: mnist/reports/train/default/predictions.json - hash: md5 - md5: 1e2cf0100bb5f0a42182021e12b00dd9 - size: 2882749 - - path: mnist/reports/train/default/score_dict.json - hash: md5 - md5: ebe552d99842320709ca466da6d3092c - size: 410 + - path: mnist/plots/aft_comparison_with_dummy.csv + md5: 00cfc38147e5ffed62c71eef81d76604 + size: 382 + - path: mnist/plots/aft_comparison_with_dummy.tex + md5: a7b04f60efcb56609a0374f4a614b322 + size: 566 + - path: mnist/plots/log_logistic_aft_dummy.eps + md5: 6c3c08024c4464c7069cc1ba0b0b2a29 + size: 62987 + - path: mnist/plots/log_logistic_epochs_partial_effect_dummy.eps + md5: 8c83fe60085369058bce8c4860a82e96 + size: 43270 + - path: mnist/plots/log_logistic_layers_partial_effect_dummy.eps + md5: df5b4c525afc9799f66348f89e4c0f11 + size: 48378 + - path: mnist/plots/log_normal_aft_dummy.eps + md5: 9792419c39453877f3a37361b5605ca0 + size: 61512 + - path: mnist/plots/log_normal_epochs_partial_effect_dummy.eps + md5: e55b95768eeaf862ba16d23e3ca17c53 + size: 45738 + - path: mnist/plots/log_normal_layers_partial_effect_dummy.eps + md5: 91d32fe10a4c39014c2b310388a34e1f + size: 48808 + - path: mnist/plots/weibull_aft_dummy.eps + md5: 75ca056699d7c578d7492d6dcacf9e57 + size: 60898 + - path: mnist/plots/weibull_epochs_partial_effect_dummy.eps + md5: d93f5cd06f362504b56215a89783990f + size: 44530 + - path: mnist/plots/weibull_layers_partial_effect_dummy.eps + md5: 0434dbd9406305ab7551e9ad8ce6bdc6 + size: 47796 diff --git a/examples/pytorch/mnist/dvc.yaml b/examples/pytorch/mnist/dvc.yaml index 851dc5fc..2baf8db6 100644 --- a/examples/pytorch/mnist/dvc.yaml +++ b/examples/pytorch/mnist/dvc.yaml @@ -128,34 +128,30 @@ stages: - ${files.directory}/plots/${cat_plot[5].file} - ${files.directory}/plots/${cat_plot[6].file} - ${files.directory}/plots/${cat_plot[7].file} - - ${files.directory}/plots/${cat_plot[8].file} - ${files.directory}/plots/${line_plot[0].file} - ${files.directory}/plots/${line_plot[1].file} - ${files.directory}/plots/${line_plot[2].file} - ${files.directory}/plots/${line_plot[3].file} - ${files.directory}/plots/${scatter_plot[0].file} params: - - files.directory - - files.reports - conf/plots.yaml: - line_plot - scatter_plot - cat_plot afr: - cmd: python -m deckard.layers.afr --dataset ${files.directory} --data_file ${files.directory}/${files.reports}/clean_attack.csv --duration_col adv_fit_time --dataset mnist --config_file conf/afr.yaml --plots_folder ${files.directory}/plots/ + cmd: python -m deckard.layers.afr --dataset ${files.directory} --data_file ${files.directory}/${files.reports}/clean_attack.csv --target adv_accuracy --duration_col predict_time --dataset mnist --config_file conf/afr.yaml --plots_folder ${files.directory}/plots/ deps: - ${files.directory}/${files.reports}/clean_attack.csv - - ${files.directory}/plots/${cat_plot[0].file} plots: - - ${files.directory}/plots/weibull_aft.pdf - - ${files.directory}/plots/weibull_epochs_partial_effect.pdf - - ${files.directory}/plots/weibull_layers_partial_effect.pdf - - ${files.directory}/plots/log_logistic_aft.pdf - - ${files.directory}/plots/log_logistic_epochs_partial_effect.pdf - - ${files.directory}/plots/log_logistic_layers_partial_effect.pdf - - ${files.directory}/plots/log_normal_aft.pdf - - ${files.directory}/plots/log_normal_epochs_partial_effect.pdf - - ${files.directory}/plots/log_normal_layers_partial_effect.pdf + - ${files.directory}/plots/weibull_aft.eps + - ${files.directory}/plots/weibull_epochs_partial_effect.eps + - ${files.directory}/plots/weibull_layers_partial_effect.eps + - ${files.directory}/plots/log_logistic_aft.eps + - ${files.directory}/plots/log_logistic_epochs_partial_effect.eps + - ${files.directory}/plots/log_logistic_layers_partial_effect.eps + - ${files.directory}/plots/log_normal_aft.eps + - ${files.directory}/plots/log_normal_epochs_partial_effect.eps + - ${files.directory}/plots/log_normal_layers_partial_effect.eps metrics: - ${files.directory}/plots/aft_comparison.csv outs: @@ -168,6 +164,7 @@ stages: - log_logistic - log_normal copy_results: - cmd: cp -r ${files.directory}/plots/* ~/ml_afr/mnist/ + cmd: mkdir -p ~/ml_afr/mnist/ && cp -r ${files.directory}/plots/* ~/ml_afr/mnist/ deps: - ${files.directory}/plots/ + - ${files.directory}/plots/ diff --git a/examples/pytorch/tuned.ipynb b/examples/pytorch/tuned.ipynb new file mode 100644 index 00000000..9ed15d18 --- /dev/null +++ b/examples/pytorch/tuned.ipynb @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "df = pd.read_csv(\"cifar100/cifar100/reports/clean_attack.csv\", index_col=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# ! cd ../../ && python -m pip install -e ." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(718, 233)" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from deckard.layers.clean_data import pareto_set\n", + "\n", + "sense_dict = {\n", + " \"accuracy\": \"max\",\n", + " \"adv_accuracy\": \"max\",\n", + " # \"atk_gen\": \"diff\",\n", + " \"def_value\": \"diff\",\n", + " \"def_gen\": \"diff\",\n", + " \"data.sample.random_state\": \"diff\",\n", + " # \"train_time\" : \"min\",\n", + " # \"predict_time\" : \"min\",\n", + " # \"model_layers\" : \"diff\",\n", + "}\n", + "pareto = pareto_set(df, sense_dict)\n", + "pareto.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.lineplot(\n", + " data=pareto,\n", + " x=\"def_value\",\n", + " y=\"accuracy\",\n", + " hue=\"def_gen\",\n", + " errorbar=\"se\",\n", + " err_style=\"bars\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "survey", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/pytorch/weibull.ipynb b/examples/pytorch/weibull.ipynb deleted file mode 100644 index def6e2d1..00000000 --- a/examples/pytorch/weibull.ipynb +++ /dev/null @@ -1,2062 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "from pathlib import Path\n", - "import seaborn as sns\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "\n", - "from paretoset import paretoset\n", - "from lifelines import (\n", - " WeibullAFTFitter,\n", - " LogNormalAFTFitter,\n", - " LogLogisticAFTFitter,\n", - " CoxPHFitter,\n", - ")\n", - "from plots import calculate_failure_rate, drop_frames_without_results, min_max_scaling\n", - "import matplotlib\n", - "import argparse\n", - "from pathlib import Path\n", - "import logging\n", - "\n", - "logger = logging.getLogger(__name__)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "font = {\"family\": \"Times New Roman\", \"weight\": \"bold\", \"size\": 22}\n", - "\n", - "matplotlib.rc(\"font\", **font)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Adversarial Accuracy: \n", - " ResNet152: 0.08431102362204726 \n", - " Resnet101: 0.08595785440613028 \n", - " Resnet50: 0.09093333333333335 \n", - " Resnet34: 0.08867549668874172 \n", - " Resnet18: 0.07971698113207548 \n", - "\n" - ] - } - ], - "source": [ - "FOLDER = Path(\"output/plots/\")\n", - "csv_file = FOLDER / \"data.csv\"\n", - "data = pd.read_csv(csv_file, index_col=0)\n", - "data.columns = data.columns.str.strip()\n", - "data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x)\n", - "data.def_value.replace(\"\", 0, inplace=True)\n", - "data.atk_value.replace(\"\", 0, inplace=True)\n", - "data = drop_frames_without_results(data)\n", - "data = calculate_failure_rate(data)\n", - "data = min_max_scaling(data)\n", - "data.dropna(axis=0, subset=[\"atk_value\", \"atk_param\"], inplace=True)\n", - "data.dropna(axis=0, subset=[\"def_value\", \"def_param\"], inplace=True)\n", - "# data=data[data['def_gen'] == 'Gauss-in']\n", - "# data=data[data['atk_gen'] == 'HSJ']\n", - "\n", - "print(\n", - " \"Adversarial Accuracy:\",\n", - " \"\\n\",\n", - " \"ResNet152:\",\n", - " data[data[\"model_layers\"] == 152].adv_accuracy.mean(skipna=True),\n", - " \"\\n\",\n", - " \"Resnet101:\",\n", - " data[data[\"model_layers\"] == 101].adv_accuracy.mean(skipna=True),\n", - " \"\\n\",\n", - " \"Resnet50:\",\n", - " data[data[\"model_layers\"] == 50].adv_accuracy.mean(skipna=True),\n", - " \"\\n\",\n", - " \"Resnet34:\",\n", - " data[data[\"model_layers\"] == 34].adv_accuracy.mean(skipna=True),\n", - " \"\\n\",\n", - " \"Resnet18:\",\n", - " data[data[\"model_layers\"] == 18].adv_accuracy.mean(skipna=True),\n", - " \"\\n\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_aft(\n", - " df,\n", - " file,\n", - " event_col,\n", - " duration_col,\n", - " title,\n", - " mtype,\n", - " xlabel=None,\n", - " ylabel=None,\n", - " replacement_dict={},\n", - " **kwargs,\n", - "):\n", - " if mtype == \"weibull\":\n", - " aft = WeibullAFTFitter(**kwargs)\n", - " elif mtype == \"log_normal\":\n", - " aft = LogNormalAFTFitter(**kwargs)\n", - " elif mtype == \"log_logistic\":\n", - " aft = LogLogisticAFTFitter(**kwargs)\n", - " elif mtype == \"cox\":\n", - " aft = CoxPHFitter(**kwargs)\n", - " assert (\n", - " duration_col in df.columns\n", - " ), f\"Column {duration_col} not in dataframe with columns {df.columns}\"\n", - " if event_col is not None:\n", - " assert (\n", - " event_col in df.columns\n", - " ), f\"Column {event_col} not in dataframe with columns {df.columns}\"\n", - " aft.fit(df, duration_col=duration_col, event_col=event_col)\n", - " ax = aft.plot()\n", - " labels = ax.get_yticklabels()\n", - " labels = [label.get_text() for label in labels]\n", - " for k, v in replacement_dict.items():\n", - " labels = [label.replace(k, v) for label in labels]\n", - " ax.set_yticklabels(labels)\n", - " ax.set_xlabel(xlabel)\n", - " ax.set_ylabel(ylabel)\n", - " ax.set_title(title)\n", - " ax.get_figure().tight_layout()\n", - " ax.get_figure().savefig(FOLDER / file)\n", - " logger.info(f\"Saved graph to {FOLDER / file}\")\n", - " plt.show()\n", - " return ax, aft\n", - "\n", - "\n", - "def plot_partial_effects(\n", - " file,\n", - " aft,\n", - " covariate_array,\n", - " values_array,\n", - " title,\n", - " xlabel=\"Covariate\",\n", - " ylabel=\"Failure rate\",\n", - " legend_kwargs={\"loc\": \"upper left\"},\n", - " replacement_dict={},\n", - " cmap=\"coolwarm\",\n", - " **kwargs,\n", - "):\n", - " plt.gcf().clear()\n", - " # kwargs.pop(\"replacement_dict\")\n", - " pareto = aft.plot_partial_effects_on_outcome(\n", - " covariate_array, values_array, cmap=cmap, **kwargs\n", - " )\n", - " labels = pareto.get_yticklabels()\n", - " labels = [label.get_text() for label in labels]\n", - " for k, v in replacement_dict.items():\n", - " labels = [label.replace(k, v) for label in labels]\n", - " pareto.set_yticklabels(labels)\n", - " pareto.legend(**legend_kwargs)\n", - " pareto.set_ylabel(ylabel)\n", - " pareto.set_xlabel(xlabel)\n", - " pareto.set_title(title)\n", - " pareto.get_figure().tight_layout()\n", - " pareto.get_figure().savefig(FOLDER / file)\n", - " logger.info(f\"Saved graph to {FOLDER / file}\")\n", - " return pareto\n", - "\n", - "\n", - "def score_model(aft, train, test):\n", - " train_score = aft.score(train)\n", - " test_score = aft.score(test)\n", - " scores = {\"train_score\": train_score, \"test_score\": test_score}\n", - " plt.show()\n", - " return scores\n", - "\n", - "\n", - "def clean_data_for_aft(data, kwarg_list, target=\"adv_failure_rate\"):\n", - " subset = data.copy()\n", - " assert (\n", - " target in subset\n", - " ), f\"Target {target} not in dataframe with columns {subset.columns}\"\n", - "\n", - " cleaned = pd.DataFrame()\n", - " kwarg_list.append(target)\n", - " for kwarg in kwarg_list:\n", - " cleaned = pd.concat([cleaned, subset[kwarg]], axis=1)\n", - " cols = cleaned.columns\n", - " cleaned = pd.DataFrame(subset, columns=cols)\n", - "\n", - " # if \"accuracy\" in cleaned.columns:\n", - " # cleaned = cleaned[~cleaned[cleaned['accuracy'] != 1e10]]\n", - " # cleaned = cleaned[~cleaned[cleaned['accuracy'] != -1e10]]\n", - " # if \"adv_accuracy\" in cleaned.columns:\n", - " # cleaned = cleaned[cleaned[cleaned['adv_accuracy'] != 1e10]]\n", - " # cleaned = cleaned[cleaned[cleaned['adv_accuracy'] != -1e10]]\n", - " cleaned.dropna(inplace=True, how=\"any\", axis=0)\n", - " y = cleaned[target]\n", - " assert (\n", - " target in cleaned\n", - " ), f\"Target {target} not in dataframe with columns {cleaned.columns}\"\n", - " return cleaned, y, data" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "kwarg_list = [\n", - " # \"accuracy\",\n", - " \"train_time\",\n", - " \"predict_time\",\n", - " \"atk_value\",\n", - " \"def_value\",\n", - " \"data.sample.random_state\",\n", - " \"adv_failure_rate\",\n", - " # \"failure_rate\",\n", - " \"model_layers\",\n", - " \"adv_fit_time\",\n", - " # \"atk_param\",\n", - " # \"def_param\",\n", - " \"model.art.pipeline.initialize.kwargs.optimizer.lr\",\n", - " # \"def_gen\",\n", - " # \"atk_gen\",\n", - " # \"adv_log_loss\",\n", - " # \"adv_accuracy\",\n", - " # \"adv_accuracy\",\n", - "]\n", - "\n", - "\n", - "# cleaned['accuracy'] = y" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "data.loc[:, \"adv_failures\"] = 1 - data.loc[:, \"adv_accuracy\"]\n", - "data.loc[:, \"ben_failures\"] = 1 - data.loc[:, \"accuracy\"]\n", - "target = \"adv_failures\"\n", - "duration_col = \"adv_fit_time\"\n", - "cleaned, y, data = clean_data_for_aft(data, kwarg_list, target=target)\n", - "X_train, X_test, y_train, y_test = train_test_split(\n", - " cleaned, y, test_size=0.2, random_state=42\n", - ")\n", - "assert (\n", - " target in cleaned\n", - "), f\"Target {target} not in dataframe with columns {cleaned.columns}\"" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# from sklearn.preprocessing import PowerTransformer\n", - "# pt = PowerTransformer(method='yeo-johnson', standardize=False)\n", - "# del X_train[target]\n", - "# del X_test[target]\n", - "# X_train_cols = X_train.columns\n", - "# X_train = pt.fit(X_train).transform(X_train)\n", - "# X_test = pt.transform(X_test)\n", - "# X_train = pd.DataFrame(X_train, columns=X_train_cols)\n", - "# X_test = pd.DataFrame(X_test, columns=X_train_cols)\n", - "# X_train[target] = y_train\n", - "# y_train = X_train[target]" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# sense_dict ={\n", - "# \"accuracy\" : \"max\",\n", - "# \"train_time\" : \"min\",\n", - "# \"predict_time\" : \"min\",\n", - "# # \"atk_value\" : \"diff\",\n", - "# # \"def_value\" : \"diff\",\n", - "# \"data.sample.random_state\" : \"diff\",\n", - "# \"adv_accuracy\" : \"min\",\n", - "# \"model_layers\" : \"diff\",\n", - "# # \"adv_fit_time\" : \"min\",\n", - "# # \"atk_param\" : \"diff\",\n", - "# # \"def_param\" : \"diff\",\n", - "# \"model.art.pipeline.initialize.kwargs.optimizer.lr\" : \"diff\",\n", - "# # \"adv_failure_rate\" : \"maximize\",\n", - "# }\n", - "# subset = X_train.loc[:, sense_dict.keys()]\n", - "# senses = sense_dict.values()\n", - "# these = paretoset(subset)\n", - "# X_train = X_train.iloc[these, :]" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
modellifelines.WeibullAFTFitter
duration col'adv_fit_time'
event col'adv_failures'
number of observations1500
number of events observed1500
log-likelihood-5531.26
time fit was run2023-09-29 11:13:25 UTC
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
coefexp(coef)se(coef)coef lower 95%coef upper 95%exp(coef) lower 95%exp(coef) upper 95%cmp tozp-log2(p)
lambda_adv_failure_rate-0.001.000.00-0.00-0.001.001.000.00-39.62<0.005inf
atk_value0.151.160.16-0.160.460.851.580.000.950.341.56
data.sample.random_state0.031.030.02-0.010.070.991.070.001.310.192.39
def_value-0.210.810.16-0.530.100.591.110.00-1.310.192.41
model.art.pipeline.initialize.kwargs.optimizer.lr-0.001.000.00-0.000.001.001.000.00-0.530.600.74
model_layers0.011.010.000.010.011.011.010.007.04<0.00538.88
predict_time-0.150.860.01-0.17-0.120.840.880.00-12.17<0.005110.86
train_time0.001.000.000.000.001.001.000.0010.81<0.00588.14
Intercept3.0020.180.182.653.3614.1428.790.0016.56<0.005202.20
rho_Intercept-0.840.430.02-0.88-0.800.410.450.00-43.75<0.005inf

\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Concordance0.84
AIC11082.52
log-likelihood ratio test800.84 on 8 df
-log2(p) of ll-ratio test554.32
\n", - "
" - ], - "text/latex": [ - "\\begin{tabular}{llrrrrrrrrrrr}\n", - " & & coef & exp(coef) & se(coef) & coef lower 95% & coef upper 95% & exp(coef) lower 95% & exp(coef) upper 95% & cmp to & z & p & -log2(p) \\\\\n", - "param & covariate & & & & & & & & & & & \\\\\n", - "\\multirow[c]{9}{*}{lambda_} & adv_failure_rate & -0.00 & 1.00 & 0.00 & -0.00 & -0.00 & 1.00 & 1.00 & 0.00 & -39.62 & 0.00 & inf \\\\\n", - " & atk_value & 0.15 & 1.16 & 0.16 & -0.16 & 0.46 & 0.85 & 1.58 & 0.00 & 0.95 & 0.34 & 1.56 \\\\\n", - " & data.sample.random_state & 0.03 & 1.03 & 0.02 & -0.01 & 0.07 & 0.99 & 1.07 & 0.00 & 1.31 & 0.19 & 2.39 \\\\\n", - " & def_value & -0.21 & 0.81 & 0.16 & -0.53 & 0.10 & 0.59 & 1.11 & 0.00 & -1.31 & 0.19 & 2.41 \\\\\n", - " & model.art.pipeline.initialize.kwargs.optimizer.lr & -0.00 & 1.00 & 0.00 & -0.00 & 0.00 & 1.00 & 1.00 & 0.00 & -0.53 & 0.60 & 0.74 \\\\\n", - " & model_layers & 0.01 & 1.01 & 0.00 & 0.01 & 0.01 & 1.01 & 1.01 & 0.00 & 7.04 & 0.00 & 38.88 \\\\\n", - " & predict_time & -0.15 & 0.86 & 0.01 & -0.17 & -0.12 & 0.84 & 0.88 & 0.00 & -12.17 & 0.00 & 110.86 \\\\\n", - " & train_time & 0.00 & 1.00 & 0.00 & 0.00 & 0.00 & 1.00 & 1.00 & 0.00 & 10.81 & 0.00 & 88.14 \\\\\n", - " & Intercept & 3.00 & 20.18 & 0.18 & 2.65 & 3.36 & 14.14 & 28.79 & 0.00 & 16.56 & 0.00 & 202.20 \\\\\n", - "rho_ & Intercept & -0.84 & 0.43 & 0.02 & -0.88 & -0.80 & 0.41 & 0.45 & 0.00 & -43.75 & 0.00 & inf \\\\\n", - "\\end{tabular}\n" - ], - "text/plain": [ - "\n", - " duration col = 'adv_fit_time'\n", - " event col = 'adv_failures'\n", - " number of observations = 1500\n", - "number of events observed = 1500\n", - " log-likelihood = -5531.26\n", - " time fit was run = 2023-09-29 11:13:25 UTC\n", - "\n", - "---\n", - " coef exp(coef) se(coef) coef lower 95% coef upper 95% exp(coef) lower 95% exp(coef) upper 95%\n", - "param covariate \n", - "lambda_ adv_failure_rate -0.00 1.00 0.00 -0.00 -0.00 1.00 1.00\n", - " atk_value 0.15 1.16 0.16 -0.16 0.46 0.85 1.58\n", - " data.sample.random_state 0.03 1.03 0.02 -0.01 0.07 0.99 1.07\n", - " def_value -0.21 0.81 0.16 -0.53 0.10 0.59 1.11\n", - " model.art.pipeline.initialize.kwargs.optimizer.lr -0.00 1.00 0.00 -0.00 0.00 1.00 1.00\n", - " model_layers 0.01 1.01 0.00 0.01 0.01 1.01 1.01\n", - " predict_time -0.15 0.86 0.01 -0.17 -0.12 0.84 0.88\n", - " train_time 0.00 1.00 0.00 0.00 0.00 1.00 1.00\n", - " Intercept 3.00 20.18 0.18 2.65 3.36 14.14 28.79\n", - "rho_ Intercept -0.84 0.43 0.02 -0.88 -0.80 0.41 0.45\n", - "\n", - " cmp to z p -log2(p)\n", - "param covariate \n", - "lambda_ adv_failure_rate 0.00 -39.62 <0.005 inf\n", - " atk_value 0.00 0.95 0.34 1.56\n", - " data.sample.random_state 0.00 1.31 0.19 2.39\n", - " def_value 0.00 -1.31 0.19 2.41\n", - " model.art.pipeline.initialize.kwargs.optimizer.lr 0.00 -0.53 0.60 0.74\n", - " model_layers 0.00 7.04 <0.005 38.88\n", - " predict_time 0.00 -12.17 <0.005 110.86\n", - " train_time 0.00 10.81 <0.005 88.14\n", - " Intercept 0.00 16.56 <0.005 202.20\n", - "rho_ Intercept 0.00 -43.75 <0.005 inf\n", - "---\n", - "Concordance = 0.84\n", - "AIC = 11082.52\n", - "log-likelihood ratio test = 800.84 on 8 df\n", - "-log2(p) of ll-ratio test = 554.32" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "weibull_dict = {\n", - " \"Intercept: rho_\": \"$\\\\rho$\",\n", - " \"Intercept: lambda_\": \"$\\lambda$\",\n", - " \"data.sample.random_state: lambda_\": \"Random State\",\n", - " \"def_value: lambda_\": \"Defence Strength\",\n", - " \"atk_value: lambda_\": \"Attack Strength\",\n", - " \"train_time: lambda_\": \"Training Time\",\n", - " \"predict_time: lambda_\": \"Inference Time\",\n", - " \"adv_accuracy: lambda_\": \"Adv. Accuracy\",\n", - " \"accuracy: lambda_\": \"Ben. Accuracy\",\n", - " \"adv_fit_time: lambda_\": \"Adv. Fit Time\",\n", - " \"adv_log_loss: lambda_\": \"Adv. Log Loss\",\n", - " \"adv_failure_rate: lambda_\": \"Adv. Failure Rate\",\n", - " \"failure_rate: lambda_\": \"Ben. Failure Rate\",\n", - " \"model_layers: lambda_\": \"No. of Layers\",\n", - " \"model.art.pipeline.initialize.kwargs.optimizer.lr: lambda_\": \"Learning Rate\",\n", - " \"def_gen\": \"Defence\",\n", - "}\n", - "\n", - "weibull_afr, wft = plot_aft(\n", - " X_train,\n", - " file=\"weibull_aft.pdf\",\n", - " event_col=target,\n", - " duration_col=duration_col,\n", - " title=\"Weibull AFR Model\",\n", - " mtype=\"weibull\",\n", - " replacement_dict=weibull_dict,\n", - ")\n", - "wft.print_summary()\n", - "wft_scores = score_model(wft, X_train, X_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_773113/12050270.py:64: UserWarning: FixedFormatter should only be used together with FixedLocator\n", - " pareto.set_yticklabels(labels)\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "pareto_dict = {\n", - " \"model_layers=18\": \"18\",\n", - " \"model_layers=34\": \"34\",\n", - " \"model_layers=50\": \"50\",\n", - " \"model_layers=101\": \"101\",\n", - " \"model_layers=152\": \"152\",\n", - "}\n", - "pareto_weibull = plot_partial_effects(\n", - " file=\"weibull_partial_effects.pdf\",\n", - " aft=wft,\n", - " covariate_array=\"model_layers\",\n", - " values_array=[18, 34, 50, 101, 152],\n", - " title=\"Partial Effects of No. of Layers on Failure Rate for Weibull AFR\",\n", - " replacement_dict=pareto_dict,\n", - " ylabel=\"% Chance of Survival\",\n", - " xlabel=\"Time $T$ (seconds)\",\n", - " legend_kwargs={\n", - " \"title\": \"No. of Layers\",\n", - " \"labels\": [\"18\", \"34\", \"50\", \"101\", \"152\"],\n", - " },\n", - ")\n", - "\n", - "# weibull_accuracy = plot_partial_effects(\n", - "# file = \"weibull_partial_effect_accuracy.pdf\",\n", - "# aft = wft,\n", - "# covariate_array = \"accuracy\",\n", - "# values_array = [.9, .99, .999, .9999],\n", - "# replacement_dict=weibull_dict,\n", - "# title=\"Partial Effects of Benign Accuracy on Failure Rate\",\n", - "# ylabel=\"% Chance of Survival\",\n", - "# xlabel=\"Time $T$ (seconds)\",\n", - "# legend = {\"title\" : \"Benign Accuracy\"},\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/cmeyers/deckard/env/lib/python3.8/site-packages/lifelines/fitters/coxph_fitter.py:1614: ConvergenceWarning: Newton-Raphson failed to converge sufficiently. Please see the following tips in the lifelines documentation: https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model\n", - " warnings.warn(\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
modellifelines.CoxPHFitter
duration col'adv_fit_time'
event col'adv_failures'
baseline estimationbreslow
number of observations1500
number of events observed1500
partial log-likelihood-7421.70
time fit was run2023-09-29 11:13:28 UTC
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
coefexp(coef)se(coef)coef lower 95%coef upper 95%exp(coef) lower 95%exp(coef) upper 95%cmp tozp-log2(p)
train_time-0.001.000.00-0.00-0.001.001.000.00-4.25<0.00515.52
predict_time0.031.030.010.020.041.021.040.004.21<0.00515.26
atk_value-0.090.910.07-0.230.050.801.050.00-1.310.192.40
def_value0.041.050.07-0.090.180.911.200.000.630.530.92
data.sample.random_state-0.010.990.01-0.030.000.971.000.00-1.570.123.10
adv_failure_rate0.011.010.000.010.011.011.010.0028.70<0.005599.53
model_layers-0.001.000.00-0.00-0.001.001.000.00-5.20<0.00522.23
model.art.pipeline.initialize.kwargs.optimizer.lr-0.001.000.00-0.000.001.001.000.00-0.130.900.15

\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Concordance0.92
Partial AIC14859.40
log-likelihood ratio test4105.41 on 8 df
-log2(p) of ll-ratio testinf
\n", - "
" - ], - "text/latex": [ - "\\begin{tabular}{lrrrrrrrrrrr}\n", - " & coef & exp(coef) & se(coef) & coef lower 95% & coef upper 95% & exp(coef) lower 95% & exp(coef) upper 95% & cmp to & z & p & -log2(p) \\\\\n", - "covariate & & & & & & & & & & & \\\\\n", - "train_time & -0.00 & 1.00 & 0.00 & -0.00 & -0.00 & 1.00 & 1.00 & 0.00 & -4.25 & 0.00 & 15.52 \\\\\n", - "predict_time & 0.03 & 1.03 & 0.01 & 0.02 & 0.04 & 1.02 & 1.04 & 0.00 & 4.21 & 0.00 & 15.26 \\\\\n", - "atk_value & -0.09 & 0.91 & 0.07 & -0.23 & 0.05 & 0.80 & 1.05 & 0.00 & -1.31 & 0.19 & 2.40 \\\\\n", - "def_value & 0.04 & 1.05 & 0.07 & -0.09 & 0.18 & 0.91 & 1.20 & 0.00 & 0.63 & 0.53 & 0.92 \\\\\n", - "data.sample.random_state & -0.01 & 0.99 & 0.01 & -0.03 & 0.00 & 0.97 & 1.00 & 0.00 & -1.57 & 0.12 & 3.10 \\\\\n", - "adv_failure_rate & 0.01 & 1.01 & 0.00 & 0.01 & 0.01 & 1.01 & 1.01 & 0.00 & 28.70 & 0.00 & 599.53 \\\\\n", - "model_layers & -0.00 & 1.00 & 0.00 & -0.00 & -0.00 & 1.00 & 1.00 & 0.00 & -5.20 & 0.00 & 22.23 \\\\\n", - "model.art.pipeline.initialize.kwargs.optimizer.lr & -0.00 & 1.00 & 0.00 & -0.00 & 0.00 & 1.00 & 1.00 & 0.00 & -0.13 & 0.90 & 0.15 \\\\\n", - "\\end{tabular}\n" - ], - "text/plain": [ - "\n", - " duration col = 'adv_fit_time'\n", - " event col = 'adv_failures'\n", - " baseline estimation = breslow\n", - " number of observations = 1500\n", - "number of events observed = 1500\n", - " partial log-likelihood = -7421.70\n", - " time fit was run = 2023-09-29 11:13:28 UTC\n", - "\n", - "---\n", - " coef exp(coef) se(coef) coef lower 95% coef upper 95% exp(coef) lower 95% exp(coef) upper 95%\n", - "covariate \n", - "train_time -0.00 1.00 0.00 -0.00 -0.00 1.00 1.00\n", - "predict_time 0.03 1.03 0.01 0.02 0.04 1.02 1.04\n", - "atk_value -0.09 0.91 0.07 -0.23 0.05 0.80 1.05\n", - "def_value 0.04 1.05 0.07 -0.09 0.18 0.91 1.20\n", - "data.sample.random_state -0.01 0.99 0.01 -0.03 0.00 0.97 1.00\n", - "adv_failure_rate 0.01 1.01 0.00 0.01 0.01 1.01 1.01\n", - "model_layers -0.00 1.00 0.00 -0.00 -0.00 1.00 1.00\n", - "model.art.pipeline.initialize.kwargs.optimizer.lr -0.00 1.00 0.00 -0.00 0.00 1.00 1.00\n", - "\n", - " cmp to z p -log2(p)\n", - "covariate \n", - "train_time 0.00 -4.25 <0.005 15.52\n", - "predict_time 0.00 4.21 <0.005 15.26\n", - "atk_value 0.00 -1.31 0.19 2.40\n", - "def_value 0.00 0.63 0.53 0.92\n", - "data.sample.random_state 0.00 -1.57 0.12 3.10\n", - "adv_failure_rate 0.00 28.70 <0.005 599.53\n", - "model_layers 0.00 -5.20 <0.005 22.23\n", - "model.art.pipeline.initialize.kwargs.optimizer.lr 0.00 -0.13 0.90 0.15\n", - "---\n", - "Concordance = 0.92\n", - "Partial AIC = 14859.40\n", - "log-likelihood ratio test = 4105.41 on 8 df\n", - "-log2(p) of ll-ratio test = inf" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_773113/12050270.py:64: UserWarning: FixedFormatter should only be used together with FixedLocator\n", - " pareto.set_yticklabels(labels)\n" - ] - }, - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "cox_dict = {\n", - " \"adv_failure_rate\": \"Adv. Failure Rate\",\n", - " \"def_value\": \"Defence Strength\",\n", - " \"data.sample.random_state\": \"Random State\",\n", - " \"train_time\": \"Training Time\",\n", - " \"model_layers\": \"No. of Layers\",\n", - " \"model.art.pipeline.initialize.kwargs.optimizer.lr\": \"Learning Rate\",\n", - " \"adv_accuracy\": \"Adv. Accuracy\",\n", - " \"adv_fit_time\": \"Adv. Fit Time\",\n", - " \"adv_log_loss\": \"Adv. Log Loss\",\n", - " \"predict_time\": \"Inference Time\",\n", - " \"accuracy\": \"Ben. Accuracy\",\n", - " \"failure_rate\": \"Ben. Failure Rate\",\n", - " \"atk_value\": \"Attack Strength\",\n", - "}\n", - "\n", - "cox_afr, cft = plot_aft(\n", - " X_train,\n", - " file=\"cox_aft.pdf\",\n", - " event_col=target,\n", - " duration_col=duration_col,\n", - " title=\"Cox AFR Model\",\n", - " mtype=\"cox\",\n", - " replacement_dict=cox_dict,\n", - ")\n", - "cox_scores = score_model(cft, X_train, X_test)\n", - "cft.print_summary()\n", - "cox_partial = plot_partial_effects(\n", - " file=\"cox_partial_effects.pdf\",\n", - " aft=cft,\n", - " covariate_array=\"model_layers\",\n", - " values_array=[18, 34, 50, 101, 152],\n", - " replacement_dict=cox_dict,\n", - " title=\"Survival Time for Cox AFR\",\n", - " ylabel=\"% Chance of Survival\",\n", - " xlabel=\"Time $T$ (seconds)\",\n", - " legend_kwargs={\n", - " \"title\": \"No. of Layers\",\n", - " \"labels\": [\"18\", \"34\", \"50\", \"101\", \"152\"],\n", - " },\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
modellifelines.LogNormalAFTFitter
duration col'adv_fit_time'
event col'adv_failures'
number of observations1500
number of events observed1500
log-likelihood-5374.54
time fit was run2023-09-29 11:13:31 UTC
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
coefexp(coef)se(coef)coef lower 95%coef upper 95%exp(coef) lower 95%exp(coef) upper 95%cmp tozp-log2(p)
mu_adv_failure_rate-0.001.000.00-0.00-0.001.001.000.00-31.84<0.005736.80
atk_value0.081.090.16-0.220.390.801.480.000.520.600.74
data.sample.random_state0.021.020.02-0.020.060.981.060.000.830.411.30
def_value-0.160.850.16-0.480.160.621.170.00-0.980.321.62
model.art.pipeline.initialize.kwargs.optimizer.lr-0.001.000.00-0.000.001.001.000.00-0.060.950.07
model_layers0.011.010.000.010.011.011.010.007.13<0.00539.81
predict_time-0.210.810.02-0.26-0.160.770.850.00-8.37<0.00553.94
train_time0.001.000.000.000.001.001.000.007.13<0.00539.89
Intercept2.027.540.171.682.365.3710.610.0011.63<0.005101.36
sigma_Intercept0.842.310.020.800.872.232.390.0045.86<0.005inf

\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Concordance0.84
AIC10769.08
log-likelihood ratio test925.72 on 8 df
-log2(p) of ll-ratio test643.78
\n", - "
" - ], - "text/latex": [ - "\\begin{tabular}{llrrrrrrrrrrr}\n", - " & & coef & exp(coef) & se(coef) & coef lower 95% & coef upper 95% & exp(coef) lower 95% & exp(coef) upper 95% & cmp to & z & p & -log2(p) \\\\\n", - "param & covariate & & & & & & & & & & & \\\\\n", - "\\multirow[c]{9}{*}{mu_} & adv_failure_rate & -0.00 & 1.00 & 0.00 & -0.00 & -0.00 & 1.00 & 1.00 & 0.00 & -31.84 & 0.00 & 736.80 \\\\\n", - " & atk_value & 0.08 & 1.09 & 0.16 & -0.22 & 0.39 & 0.80 & 1.48 & 0.00 & 0.52 & 0.60 & 0.74 \\\\\n", - " & data.sample.random_state & 0.02 & 1.02 & 0.02 & -0.02 & 0.06 & 0.98 & 1.06 & 0.00 & 0.83 & 0.41 & 1.30 \\\\\n", - " & def_value & -0.16 & 0.85 & 0.16 & -0.48 & 0.16 & 0.62 & 1.17 & 0.00 & -0.98 & 0.32 & 1.62 \\\\\n", - " & model.art.pipeline.initialize.kwargs.optimizer.lr & -0.00 & 1.00 & 0.00 & -0.00 & 0.00 & 1.00 & 1.00 & 0.00 & -0.06 & 0.95 & 0.07 \\\\\n", - " & model_layers & 0.01 & 1.01 & 0.00 & 0.01 & 0.01 & 1.01 & 1.01 & 0.00 & 7.13 & 0.00 & 39.81 \\\\\n", - " & predict_time & -0.21 & 0.81 & 0.02 & -0.26 & -0.16 & 0.77 & 0.85 & 0.00 & -8.37 & 0.00 & 53.94 \\\\\n", - " & train_time & 0.00 & 1.00 & 0.00 & 0.00 & 0.00 & 1.00 & 1.00 & 0.00 & 7.13 & 0.00 & 39.89 \\\\\n", - " & Intercept & 2.02 & 7.54 & 0.17 & 1.68 & 2.36 & 5.37 & 10.61 & 0.00 & 11.63 & 0.00 & 101.36 \\\\\n", - "sigma_ & Intercept & 0.84 & 2.31 & 0.02 & 0.80 & 0.87 & 2.23 & 2.39 & 0.00 & 45.86 & 0.00 & inf \\\\\n", - "\\end{tabular}\n" - ], - "text/plain": [ - "\n", - " duration col = 'adv_fit_time'\n", - " event col = 'adv_failures'\n", - " number of observations = 1500\n", - "number of events observed = 1500\n", - " log-likelihood = -5374.54\n", - " time fit was run = 2023-09-29 11:13:31 UTC\n", - "\n", - "---\n", - " coef exp(coef) se(coef) coef lower 95% coef upper 95% exp(coef) lower 95% exp(coef) upper 95%\n", - "param covariate \n", - "mu_ adv_failure_rate -0.00 1.00 0.00 -0.00 -0.00 1.00 1.00\n", - " atk_value 0.08 1.09 0.16 -0.22 0.39 0.80 1.48\n", - " data.sample.random_state 0.02 1.02 0.02 -0.02 0.06 0.98 1.06\n", - " def_value -0.16 0.85 0.16 -0.48 0.16 0.62 1.17\n", - " model.art.pipeline.initialize.kwargs.optimizer.lr -0.00 1.00 0.00 -0.00 0.00 1.00 1.00\n", - " model_layers 0.01 1.01 0.00 0.01 0.01 1.01 1.01\n", - " predict_time -0.21 0.81 0.02 -0.26 -0.16 0.77 0.85\n", - " train_time 0.00 1.00 0.00 0.00 0.00 1.00 1.00\n", - " Intercept 2.02 7.54 0.17 1.68 2.36 5.37 10.61\n", - "sigma_ Intercept 0.84 2.31 0.02 0.80 0.87 2.23 2.39\n", - "\n", - " cmp to z p -log2(p)\n", - "param covariate \n", - "mu_ adv_failure_rate 0.00 -31.84 <0.005 736.80\n", - " atk_value 0.00 0.52 0.60 0.74\n", - " data.sample.random_state 0.00 0.83 0.41 1.30\n", - " def_value 0.00 -0.98 0.32 1.62\n", - " model.art.pipeline.initialize.kwargs.optimizer.lr 0.00 -0.06 0.95 0.07\n", - " model_layers 0.00 7.13 <0.005 39.81\n", - " predict_time 0.00 -8.37 <0.005 53.94\n", - " train_time 0.00 7.13 <0.005 39.89\n", - " Intercept 0.00 11.63 <0.005 101.36\n", - "sigma_ Intercept 0.00 45.86 <0.005 inf\n", - "---\n", - "Concordance = 0.84\n", - "AIC = 10769.08\n", - "log-likelihood ratio test = 925.72 on 8 df\n", - "-log2(p) of ll-ratio test = 643.78" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_773113/12050270.py:64: UserWarning: FixedFormatter should only be used together with FixedLocator\n", - " pareto.set_yticklabels(labels)\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "log_normal_dict = {\n", - " \"Intercept: sigma_\": \"$\\sigma$\",\n", - " \"Intercept: mu_\": \"$\\mu$\",\n", - " \"def_value: mu_\": \"Defence Strength\",\n", - " \"atk_value: mu_\": \"Attack Strength\",\n", - " \"train_time: mu_\": \"Training Time\",\n", - " \"predict_time: mu_\": \"Inference Time\",\n", - " \"adv_fit_time: mu_\": \"Adv. Fit Time\",\n", - " \"model_layers: mu_\": \"No. of Layers\",\n", - " \"model.art.pipeline.initialize.kwargs.optimizer.lr: mu_\": \"Learning Rate\",\n", - " \"data.sample.random_state: mu_\": \"Random State\",\n", - " \"adv_log_loss: mu_\": \"Adv. Log Loss\",\n", - " \"adv_accuracy: mu_\": \"Adv. Accuracy\",\n", - " \"accuracy: mu_\": \"Ben. Accuracy\",\n", - " \"adv_failure_rate: mu_\": \"Adv. Failure Rate\",\n", - " \"def_gen\": \"Defence\",\n", - " \"learning_rate: mu_\": \"Learning Rate\",\n", - "}\n", - "\n", - "log_normal_graph, lnt = plot_aft(\n", - " X_train,\n", - " \"log_normal_aft.pdf\",\n", - " target,\n", - " duration_col,\n", - " \"Log Normal AFR Model\",\n", - " \"log_normal\",\n", - " replacement_dict=log_normal_dict,\n", - ")\n", - "lnt_scores = score_model(lnt, X_train, X_test)\n", - "lnt.print_summary()\n", - "lnt_partial = plot_partial_effects(\n", - " file=\"log_normal_partial_effects.pdf\",\n", - " aft=lnt,\n", - " covariate_array=\"model_layers\",\n", - " values_array=[18, 34, 50, 101, 152],\n", - " replacement_dict=log_normal_dict,\n", - " title=\"Survival Time for Log-Normal AFR\",\n", - " ylabel=\"% Chance of Survival\",\n", - " xlabel=\"Time $T$ (seconds)\",\n", - " legend_kwargs={\n", - " \"title\": \"No. of Layers\",\n", - " \"labels\": [\"18\", \"34\", \"50\", \"101\", \"152\"],\n", - " },\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
modellifelines.LogLogisticAFTFitter
duration col'adv_fit_time'
event col'adv_failures'
number of observations1500
number of events observed1500
log-likelihood-5426.84
time fit was run2023-09-29 11:13:33 UTC
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
coefexp(coef)se(coef)coef lower 95%coef upper 95%exp(coef) lower 95%exp(coef) upper 95%cmp tozp-log2(p)
alpha_adv_failure_rate-0.001.000.00-0.00-0.001.001.000.00-26.62<0.005516.20
atk_value0.071.070.17-0.260.400.771.490.000.420.670.58
data.sample.random_state0.021.020.02-0.020.060.981.070.000.910.361.47
def_value-0.190.820.18-0.540.150.581.160.00-1.110.271.90
model.art.pipeline.initialize.kwargs.optimizer.lr0.001.000.00-0.000.001.001.000.000.030.970.04
model_layers0.011.010.000.010.021.011.020.007.88<0.00548.13
predict_time-0.310.740.04-0.38-0.240.690.790.00-8.73<0.00558.49
train_time0.001.000.000.000.001.001.000.006.12<0.00530.02
Intercept1.886.540.191.512.254.529.460.009.97<0.00575.31
beta_Intercept-0.320.720.02-0.36-0.280.690.750.00-15.46<0.005176.59

\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Concordance0.82
AIC10873.68
log-likelihood ratio test961.93 on 8 df
-log2(p) of ll-ratio test669.73
\n", - "
" - ], - "text/latex": [ - "\\begin{tabular}{llrrrrrrrrrrr}\n", - " & & coef & exp(coef) & se(coef) & coef lower 95% & coef upper 95% & exp(coef) lower 95% & exp(coef) upper 95% & cmp to & z & p & -log2(p) \\\\\n", - "param & covariate & & & & & & & & & & & \\\\\n", - "\\multirow[c]{9}{*}{alpha_} & adv_failure_rate & -0.00 & 1.00 & 0.00 & -0.00 & -0.00 & 1.00 & 1.00 & 0.00 & -26.62 & 0.00 & 516.20 \\\\\n", - " & atk_value & 0.07 & 1.07 & 0.17 & -0.26 & 0.40 & 0.77 & 1.49 & 0.00 & 0.42 & 0.67 & 0.58 \\\\\n", - " & data.sample.random_state & 0.02 & 1.02 & 0.02 & -0.02 & 0.06 & 0.98 & 1.07 & 0.00 & 0.91 & 0.36 & 1.47 \\\\\n", - " & def_value & -0.19 & 0.82 & 0.18 & -0.54 & 0.15 & 0.58 & 1.16 & 0.00 & -1.11 & 0.27 & 1.90 \\\\\n", - " & model.art.pipeline.initialize.kwargs.optimizer.lr & 0.00 & 1.00 & 0.00 & -0.00 & 0.00 & 1.00 & 1.00 & 0.00 & 0.03 & 0.97 & 0.04 \\\\\n", - " & model_layers & 0.01 & 1.01 & 0.00 & 0.01 & 0.02 & 1.01 & 1.02 & 0.00 & 7.88 & 0.00 & 48.13 \\\\\n", - " & predict_time & -0.31 & 0.74 & 0.04 & -0.38 & -0.24 & 0.69 & 0.79 & 0.00 & -8.73 & 0.00 & 58.49 \\\\\n", - " & train_time & 0.00 & 1.00 & 0.00 & 0.00 & 0.00 & 1.00 & 1.00 & 0.00 & 6.12 & 0.00 & 30.02 \\\\\n", - " & Intercept & 1.88 & 6.54 & 0.19 & 1.51 & 2.25 & 4.52 & 9.46 & 0.00 & 9.97 & 0.00 & 75.31 \\\\\n", - "beta_ & Intercept & -0.32 & 0.72 & 0.02 & -0.36 & -0.28 & 0.69 & 0.75 & 0.00 & -15.46 & 0.00 & 176.59 \\\\\n", - "\\end{tabular}\n" - ], - "text/plain": [ - "\n", - " duration col = 'adv_fit_time'\n", - " event col = 'adv_failures'\n", - " number of observations = 1500\n", - "number of events observed = 1500\n", - " log-likelihood = -5426.84\n", - " time fit was run = 2023-09-29 11:13:33 UTC\n", - "\n", - "---\n", - " coef exp(coef) se(coef) coef lower 95% coef upper 95% exp(coef) lower 95% exp(coef) upper 95%\n", - "param covariate \n", - "alpha_ adv_failure_rate -0.00 1.00 0.00 -0.00 -0.00 1.00 1.00\n", - " atk_value 0.07 1.07 0.17 -0.26 0.40 0.77 1.49\n", - " data.sample.random_state 0.02 1.02 0.02 -0.02 0.06 0.98 1.07\n", - " def_value -0.19 0.82 0.18 -0.54 0.15 0.58 1.16\n", - " model.art.pipeline.initialize.kwargs.optimizer.lr 0.00 1.00 0.00 -0.00 0.00 1.00 1.00\n", - " model_layers 0.01 1.01 0.00 0.01 0.02 1.01 1.02\n", - " predict_time -0.31 0.74 0.04 -0.38 -0.24 0.69 0.79\n", - " train_time 0.00 1.00 0.00 0.00 0.00 1.00 1.00\n", - " Intercept 1.88 6.54 0.19 1.51 2.25 4.52 9.46\n", - "beta_ Intercept -0.32 0.72 0.02 -0.36 -0.28 0.69 0.75\n", - "\n", - " cmp to z p -log2(p)\n", - "param covariate \n", - "alpha_ adv_failure_rate 0.00 -26.62 <0.005 516.20\n", - " atk_value 0.00 0.42 0.67 0.58\n", - " data.sample.random_state 0.00 0.91 0.36 1.47\n", - " def_value 0.00 -1.11 0.27 1.90\n", - " model.art.pipeline.initialize.kwargs.optimizer.lr 0.00 0.03 0.97 0.04\n", - " model_layers 0.00 7.88 <0.005 48.13\n", - " predict_time 0.00 -8.73 <0.005 58.49\n", - " train_time 0.00 6.12 <0.005 30.02\n", - " Intercept 0.00 9.97 <0.005 75.31\n", - "beta_ Intercept 0.00 -15.46 <0.005 176.59\n", - "---\n", - "Concordance = 0.82\n", - "AIC = 10873.68\n", - "log-likelihood ratio test = 961.93 on 8 df\n", - "-log2(p) of ll-ratio test = 669.73" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'train_score': -3.617893432558881, 'test_score': -3.971188942813805}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_773113/12050270.py:64: UserWarning: FixedFormatter should only be used together with FixedLocator\n", - " pareto.set_yticklabels(labels)\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "log_logistic_dict = {\n", - " \"Intercept: beta_\": \"$\\\\beta$\",\n", - " \"Intercept: alpha_\": \"$\\\\alpha$\",\n", - " \"data.sample.random_state: alpha_\": \"Random State\",\n", - " \"def_value: alpha_\": \"Defence Strength\",\n", - " \"atk_value: alpha_\": \"Attack Strength\",\n", - " \"train_time: alpha_\": \"Training Time\",\n", - " \"predict_time: alpha_\": \"Inference Time\",\n", - " \"adv_accuracy: alpha_\": \"Adv. Accuracy\",\n", - " \"accuracy: alpha_\": \"Ben. Accuracy\",\n", - " \"adv_fit_time: alpha_\": \"Adv. Fit Time\",\n", - " \"model_layers: alpha_\": \"No. of Layers\",\n", - " \"model.art.pipeline.initialize.kwargs.optimizer.lr\": \"Learning Rate\",\n", - " \"adv_failure_rate: alpha_\": \"Adv. Failure Rate\",\n", - " \"alpha_\": \"\",\n", - "}\n", - "\n", - "log_logistic_graph, llt = plot_aft(\n", - " X_train,\n", - " \"log_logistic_aft.pdf\",\n", - " target,\n", - " duration_col,\n", - " \"Log Logistic AFR Model\",\n", - " \"log_logistic\",\n", - " replacement_dict=log_logistic_dict,\n", - ")\n", - "llt.print_summary()\n", - "llt_scores = score_model(llt, X_train, X_test)\n", - "print(llt_scores)\n", - "llt_partial = plot_partial_effects(\n", - " file=\"log_logistic_partial_effects.pdf\",\n", - " aft=llt,\n", - " covariate_array=\"model_layers\",\n", - " values_array=[18, 34, 50, 101, 152],\n", - " replacement_dict=log_logistic_dict,\n", - " title=\"Survival Time for Log-Logistic AFR\",\n", - " ylabel=\"% Chance of Survival\",\n", - " xlabel=\"Time $T$ (seconds)\",\n", - " legend_kwargs={\n", - " \"title\": \"No. of Layers\",\n", - " \"labels\": [\"18\", \"34\", \"50\", \"101\", \"152\"],\n", - " },\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "8.62393284274078" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.mean(llt.predict_median(X_train))" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AICConcordanceBICTrain LLTest LLMean STMedian ST
Weibull11082.520.8411082.52-3.69-4.0071.5310.23
LogNormal10769.080.8410769.08-3.58-3.92122.817.79
LogLogistic10873.680.8210873.68-3.62-3.97NaN6.62
\n", - "
" - ], - "text/plain": [ - " AIC Concordance BIC Train LL Test LL Mean ST \\\n", - "Weibull 11082.52 0.84 11082.52 -3.69 -4.00 71.53 \n", - "LogNormal 10769.08 0.84 10769.08 -3.58 -3.92 122.81 \n", - "LogLogistic 10873.68 0.82 10873.68 -3.62 -3.97 NaN \n", - "\n", - " Median ST \n", - "Weibull 10.23 \n", - "LogNormal 7.79 \n", - "LogLogistic 6.62 " - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "aft_dict = {\n", - " \"Weibull\": wft,\n", - " \"LogNormal\": lnt,\n", - " \"LogLogistic\": llt,\n", - " # \"Cox\": cft,\n", - "}\n", - "\n", - "score_list = [\n", - " wft_scores,\n", - " lnt_scores,\n", - " llt_scores,\n", - " # cft_scores,\n", - "]\n", - "aft_data = pd.DataFrame()\n", - "aft_data.index.name = \"Model\"\n", - "aft_data.index = aft_dict.keys()\n", - "aft_data[\"AIC\"] = [\n", - " x.AIC_ if not isinstance(x, CoxPHFitter) else np.nan for x in aft_dict.values()\n", - "]\n", - "aft_data[\"Concordance\"] = [x.concordance_index_ for x in aft_dict.values()]\n", - "aft_data[\"BIC\"] = [\n", - " x.AIC_ if not isinstance(x, CoxPHFitter) else np.nan for x in aft_dict.values()\n", - "]\n", - "aft_data[\"Train LL\"] = [x[\"train_score\"] for x in score_list]\n", - "aft_data[\"Test LL\"] = [x[\"test_score\"] for x in score_list]\n", - "aft_data[\"Mean ST\"] = [x.predict_expectation(X_train).mean() for x in aft_dict.values()]\n", - "aft_data[\"Median ST\"] = [x.predict_median(X_train).median() for x in aft_dict.values()]\n", - "aft_data = aft_data.round(2)\n", - "aft_data.to_csv(FOLDER / \"aft_comparison.csv\")\n", - "logger.info(f\"Saved AFT comparison to {FOLDER / 'aft_comparison.csv'}\")\n", - "aft_data = aft_data.round(\n", - " 2,\n", - ")\n", - "aft_data.to_latex(\n", - " FOLDER / \"aft_comparison.tex\",\n", - " float_format=\"%.2f\",\n", - " label=\"tab:mnist\",\n", - " caption=\"Comparison of AFR Models on the MNIST dataset.\",\n", - ")\n", - "aft_data" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/security/classification/plots/.gitignore b/examples/security/classification/plots/.gitignore deleted file mode 100644 index 4c882c2e..00000000 --- a/examples/security/classification/plots/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -/accuracy_vs_attack_parameters.eps -/accuracy_vs_features.eps -/accuracy_vs_samples.eps -/confidence_vs_attack_parameters.eps -/train_time_vs_attack_parameters.eps -/train_time_vs_features.eps -/train_time_vs_samples.eps -/retrain_accuracy.eps -/retrain_confidence_vs_attack_parameters.eps -/retrain_time.eps diff --git a/examples/security/truthseeker/conf/data/truthseeker.yaml b/examples/security/truthseeker/conf/data/truthseeker.yaml index 7fdb84a9..18a3d226 100644 --- a/examples/security/truthseeker/conf/data/truthseeker.yaml +++ b/examples/security/truthseeker/conf/data/truthseeker.yaml @@ -1,15 +1,15 @@ _target_: deckard.base.data.Data generate: # _target_: deckard.base.data.generator.DataGenerator - name: https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/379b679bdea30724e9fa188931f0109ff422cce0/kdd_nsl.csv - target : -2 + name: https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/34e546e4813f154d11d4f13869b9e3481fc3e829/truthseeker.csv + target : BotScoreBinary sample: # _target_: deckard.base.data.sampler.SklearnDataSampler random_state : 0 stratify: True train_size : 10000 test_size : 1000 -ssklearn_pipeline: +sklearn_pipeline: encoder: name : sklearn.preprocessing.OrdinalEncoder handle_unknown : use_encoded_value diff --git a/examples/sklearn/.gitignore b/examples/sklearn/.gitignore index 900aa55a..c9066ade 100644 --- a/examples/sklearn/.gitignore +++ b/examples/sklearn/.gitignore @@ -3,3 +3,4 @@ /model_grid.db /attack_grid.db /output +/multirun diff --git a/setup.py b/setup.py index 65317ddb..c75633e1 100644 --- a/setup.py +++ b/setup.py @@ -11,26 +11,23 @@ "numpy", "scipy", "scikit-learn", - "six", - "setuptools", "tqdm", "numba", "pillow", - "wheel", "pandas", "pre-commit", "PyYAML", "pytest", "validators", - "yellowbrick", "hydra-core", "hydra-optuna-sweeper", "hydra-joblib-launcher", - "hydra-rq-launcher", "sqlalchemy<=1.4.46", "dvc", "paretoset", "lifelines", + "seaborn", + "jinja2", ] test_requires = [ "pytest", @@ -184,6 +181,8 @@ def get_version(rel_path): "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3 :: Only", "Topic :: Software Development :: Libraries", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Scientific/Engineering :: Artificial Intelligence", diff --git a/test/base/test_attack/test_attack.py b/test/base/test_attack/test_attack.py index cd486389..d5c26f13 100644 --- a/test/base/test_attack/test_attack.py +++ b/test/base/test_attack/test_attack.py @@ -46,10 +46,10 @@ def tearDown(self) -> None: rmtree(self.dir) -class testPoisoningAttackInitializer(testAttackInitializer): - config_dir = Path(this_dir, "../../conf/attack").resolve().as_posix() - config_file = "poisoning.yaml" - file = "attack.pkl" +# class testPoisoningAttackInitializer(testAttackInitializer): +# config_dir = Path(this_dir, "../../conf/attack").resolve().as_posix() +# config_file = "poisoning.yaml" +# file = "attack.pkl" # class testInferenceAttackInitializer(testAttackInitializer): diff --git a/test/base/test_attack/torch_example.py b/test/base/test_attack/torch_example.py index 59554442..9cfc5912 100644 --- a/test/base/test_attack/torch_example.py +++ b/test/base/test_attack/torch_example.py @@ -343,5 +343,11 @@ def resnet18(pretrained=False, progress=True, device="cpu", **kwargs): progress (bool): If True, displays a progress bar of the download to stderr """ return _resnet( - "resnet18", BasicBlock, [2, 2, 2, 2], pretrained, progress, device, **kwargs + "resnet18", + BasicBlock, + [2, 2, 2, 2], + pretrained, + progress, + device, + **kwargs, ) diff --git a/test/base/test_data/test_data.py b/test/base/test_data/test_data.py index 5f41cf1d..9c82279d 100644 --- a/test/base/test_data/test_data.py +++ b/test/base/test_data/test_data.py @@ -3,7 +3,7 @@ from tempfile import mkdtemp from shutil import rmtree import os -import numpy as np +from numpy import ndarray from hydra import initialize_config_dir, compose from hydra.utils import instantiate from pandas import DataFrame, Series @@ -35,10 +35,10 @@ def test_init(self): def test_call(self): filename = Path(self.directory, self.data_file + self.data_type).as_posix() X_train, X_test, y_train, y_test = self.data(data_file=filename) - self.assertIsInstance(X_train, np.ndarray) - self.assertIsInstance(X_test, np.ndarray) - self.assertIsInstance(y_train, np.ndarray) - self.assertIsInstance(y_test, np.ndarray) + self.assertIsInstance(X_train, ndarray) + self.assertIsInstance(X_test, ndarray) + self.assertIsInstance(y_train, ndarray) + self.assertIsInstance(y_test, ndarray) self.assertEqual(X_train.shape[0], y_train.shape[0]) self.assertEqual(X_test.shape[0], y_test.shape[0]) self.assertTrue(Path(filename).exists()) diff --git a/test/base/test_experiment/torch_example.py b/test/base/test_experiment/torch_example.py index e7479cb8..2ece33fe 100644 --- a/test/base/test_experiment/torch_example.py +++ b/test/base/test_experiment/torch_example.py @@ -343,5 +343,11 @@ def resnet18(pretrained=False, progress=True, device="cpu", **kwargs): progress (bool): If True, displays a progress bar of the download to stderr """ return _resnet( - "resnet18", BasicBlock, [2, 2, 2, 2], pretrained, progress, device, **kwargs + "resnet18", + BasicBlock, + [2, 2, 2, 2], + pretrained, + progress, + device, + **kwargs, ) diff --git a/test/base/test_model/torch_example.py b/test/base/test_model/torch_example.py index e7479cb8..2ece33fe 100644 --- a/test/base/test_model/torch_example.py +++ b/test/base/test_model/torch_example.py @@ -343,5 +343,11 @@ def resnet18(pretrained=False, progress=True, device="cpu", **kwargs): progress (bool): If True, displays a progress bar of the download to stderr """ return _resnet( - "resnet18", BasicBlock, [2, 2, 2, 2], pretrained, progress, device, **kwargs + "resnet18", + BasicBlock, + [2, 2, 2, 2], + pretrained, + progress, + device, + **kwargs, ) diff --git a/test/conf/attack/poisoning.yaml b/test/conf/attack/poisoning.yaml index b05834e4..3d86793a 100644 --- a/test/conf/attack/poisoning.yaml +++ b/test/conf/attack/poisoning.yaml @@ -10,28 +10,24 @@ data: n_redundant: 0 n_repeated: 0 sample: - _target_: deckard.base.data.sampler.SklearnDataSampler - random_state : 0 - stratify: True - train_size : .8 - test_size : .2 + random_state : 0 + stratify: True + train_size : .8 + test_size : .2 model: data : ${data} init: - _target_: deckard.base.model.ModelInitializer name: torch_example.LogisticRegression input_dim : - ${data.generate.n_features} output_dim : - ${data.generate.n_classes} - _target_: deckard.base.model.Model trainer: nb_epoch: 1 batch_size: 1024 library : torch art: library : torch - _target_ : deckard.base.model.art_pipeline.ArtPipeline initialize: criterion: name : "torch.nn.CrossEntropyLoss" @@ -40,7 +36,6 @@ model: lr : 0.01 momentum : 0.9 nb_classes : ${data.generate.n_classes} -_target_ : deckard.base.attack.Attack init: name: art.attacks.poisoning.GradientMatchingAttack model: ${model} diff --git a/test/conf/data/titanic.yaml b/test/conf/data/titanic.yaml index 40bf66b5..58111e6d 100644 --- a/test/conf/data/titanic.yaml +++ b/test/conf/data/titanic.yaml @@ -1,8 +1,3 @@ _target_: deckard.base.data.Data name: https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv target : Survived -sklearn_pipeline: - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: True - with_std: True