diff --git a/deckard/__main__.py b/deckard/__main__.py
index cbd1505f..87a38abc 100644
--- a/deckard/__main__.py
+++ b/deckard/__main__.py
@@ -1,106 +1,80 @@
#!/usr/bin/env python3
-import argparse
-import subprocess
+import sys
import logging
-from pathlib import Path
from omegaconf import OmegaConf
-from .layers.parse import save_params_file
+from .layers.afr import afr_parser, afr_main
+from .layers.attack import attack_parser, attack_main
+from .layers.clean_data import clean_data_parser, clean_data_main
+from .layers.compile import compile_parser, compile_main
+from .layers.data import data_parser, data_main
+from .layers.experiment import experiment_parser, experiment_main
+from .layers.find_best import find_best_parser, find_best_main
+from .layers.generate_grid import generate_grid_parser, generate_grid_main
+from .layers.hydra_test import hydra_test_main
+from .layers.merge import merge_parser, merge_main
+from .layers.optimise import optimise_main
+from .layers.parse import hydra_parser, parse_hydra_config
+from .layers.plots import plots_parser, plots_main
+from .layers.prepare_queue import prepare_queue_main
+from .layers.query_kepler import kepler_parser, kepler_main
OmegaConf.register_new_resolver("eval", eval)
logger = logging.getLogger(__name__)
-layer_list = list(Path(Path(__file__).parent, "layers").glob("*.py"))
-layer_list = [layer.stem for layer in layer_list]
-if "__init__" in layer_list:
- layer_list.remove("__init__")
-layer_list.append(None)
+layer_list = [
+ "afr",
+ "attack",
+ "clean_data" "compile",
+ "data",
+ "experiment",
+ "find_best",
+ "generate_grid",
+ "hydra_test",
+ "merge",
+ "optimise",
+ "parse",
+ "plots",
+ "prepare_queue",
+ "query_kepler",
+]
-def run_submodule(submodule, args):
- if len(args) == 0:
- cmd = f"python -m deckard.layers.{submodule}"
- else:
- cmd = f"python -m deckard.layers.{submodule} {args}"
- logger.info(f"Running {cmd}")
- with subprocess.Popen(
- cmd,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- shell=True,
- ) as proc:
- for line in proc.stdout:
- print(line.rstrip().decode("utf-8"))
- if proc.returncode != 0:
- logger.error(f"Error running {cmd}")
- for line in proc.stderr:
- logger.error(line.rstrip().decode("utf-8"))
- return 1
- else:
- return 0
+deckard_layer_dict = {
+ "afr": (afr_parser, afr_main),
+ "attack": (attack_parser, attack_main),
+ "clean_data": (clean_data_parser, clean_data_main),
+ "compile": (compile_parser, compile_main),
+ "data": (data_parser, data_main),
+ "experiment": (experiment_parser, experiment_main),
+ "find_best": (find_best_parser, find_best_main),
+ "generate_grid": (generate_grid_parser, generate_grid_main),
+ "hydra_test": (None, hydra_test_main),
+ "merge": (merge_parser, merge_main),
+ "optimise": (None, optimise_main),
+ "parse": (hydra_parser, parse_hydra_config),
+ "plots": (plots_parser, plots_main),
+ "prepare_queue": (None, prepare_queue_main),
+ "query_kepler": (kepler_parser, kepler_main),
+}
+assert len(deckard_layer_dict) == len(
+ layer_list,
+), "Some layers are missing from the deckard_layer_dict"
-def parse_and_repro(args, default_config="default.yaml", config_dir="conf"):
- if len(args) == 0:
- assert (
- save_params_file(
- config_dir=(
- Path(Path(), config_dir)
- if not Path(config_dir).is_absolute()
- else Path(config_dir)
- ),
- config_file=default_config,
- )
- is None
- )
- assert Path(Path(), "params.yaml").exists()
- else:
- cmd = f"python -m deckard.layers.parse {args} --config_file {default_config}"
- # error = f"error parsing command: {cmd} {args}"
- with subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) as proc:
- for line in proc.stdout:
- print(line.rstrip().decode("utf-8"))
- if Path(Path(), "dvc.yaml").exists():
- cmd = "dvc repro"
- with subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) as proc:
- for line in proc.stdout:
- print(line.rstrip().decode("utf-8"))
-
- else:
- raise ValueError("No dvc.yaml file found. Please construct a pipeline.")
- return 0
+def main(layer, args):
+ # Get the layer and the main function for the layer.
+ if layer not in deckard_layer_dict:
+ raise ValueError(f"Layer {layer} not found.")
+ parser, sub_main = deckard_layer_dict[layer]
+ # Parse the arguments.
+ args = parser.parse_args(args.args)
+ # Print the arguments and values
+ # Run the main function.
+ sub_main(args)
if __name__ == "__main__":
- logging.basicConfig(level=logging.INFO)
- parser = argparse.ArgumentParser()
- parser.add_argument(
- "--submodule",
- type=str,
- help=f"Submodule to run. Choices: {layer_list}",
- )
- parser.add_argument(
- "--config_file",
- type=str,
- help="default hydra configuration file that you would like to reproduce with dvc repro.",
- )
- parser.add_argument("--config_dir", type=str, default="conf")
- parser.add_argument("other_args", type=str, nargs="*")
- args = parser.parse_args()
- submodule = args.submodule
- if submodule is not None:
- assert (
- args.config_file is None
- ), "config_file and submodule cannot be specified at the same time"
- if submodule not in layer_list and submodule is not None:
- raise ValueError(f"Submodule {submodule} not found. Choices: {layer_list}")
- if len(args.other_args) > 0:
- other_args = " ".join(args.other_args)
- else:
- other_args = []
- if submodule is None:
- assert (
- parse_and_repro(other_args, args.config_file, config_dir=args.config_dir)
- == 0
- )
- else:
- assert run_submodule(submodule, other_args) == 0
+ # pop the first argument which is the script name
+ layer = sys.argv.pop(1)
+ # pass the rest of the arguments to the main function
+ main(layer, sys.argv)
diff --git a/deckard/layers/afr.py b/deckard/layers/afr.py
index 41c7c4dc..c69e7887 100644
--- a/deckard/layers/afr.py
+++ b/deckard/layers/afr.py
@@ -28,6 +28,14 @@
logger = logging.getLogger(__name__)
+__all__ = [
+ "afr_main",
+ "survival_probability_calibration",
+ "fit_aft",
+ "plot_aft",
+ "afr_parser",
+]
+
# Modified from https://github.com/CamDavidsonPilon/lifelines/blob/master/lifelines/calibration.py
def survival_probability_calibration(
@@ -872,7 +880,7 @@ def calculate_raw_failures(args, data, config):
return data
-def main(args):
+def afr_main(args):
target = args.target
duration_col = args.duration_col
dataset = args.dataset
@@ -929,4 +937,4 @@ def main(args):
afr_parser.add_argument("--config_file", type=str, default="afr.yaml")
afr_parser.add_argument("--plots_folder", type=str, default="plots")
args = afr_parser.parse_args()
- main(args)
+ afr_main(args)
diff --git a/deckard/layers/clean_data.py b/deckard/layers/clean_data.py
index 9fdd30d9..615a563b 100644
--- a/deckard/layers/clean_data.py
+++ b/deckard/layers/clean_data.py
@@ -478,7 +478,9 @@ def replace_strings_in_data(data, replace_dict):
v,
dict,
), f"Value for key {k} in replace_dict is not a dictionary."
- assert k in data.columns, f"Key {k} not in data.columns."
+ if k not in data.columns:
+ logger.warning(f"Column {k} not in data. Ignoring.")
+ continue
for k1, v1 in v.items():
logger.info(f"Replacing {k1} with {v1} in {k}...")
k1 = str(k1)
@@ -610,41 +612,41 @@ def drop_values(data, drop_dict):
return data
-parser = argparse.ArgumentParser()
-parser.add_argument(
+clean_data_parser = argparse.ArgumentParser()
+clean_data_parser.add_argument(
"-i",
"--input_file",
type=str,
help="Data file to read from",
required=True,
)
-parser.add_argument(
+clean_data_parser.add_argument(
"-o",
"--output_file",
type=str,
help="Data file to read from",
required=True,
)
-parser.add_argument(
+clean_data_parser.add_argument(
"-v",
"--verbosity",
default="INFO",
help="Increase output verbosity",
)
-parser.add_argument(
+clean_data_parser.add_argument(
"-c",
"--config",
help="Path to the config file",
default="clean.yaml",
)
-parser.add_argument(
+clean_data_parser.add_argument(
"-s",
"--subset",
help="Subset of data you would like to plot",
default=None,
nargs="?",
)
-parser.add_argument(
+clean_data_parser.add_argument(
"-d",
"--drop_if_empty",
help="Drop row if this columns is empty",
@@ -656,14 +658,14 @@ def drop_values(data, drop_dict):
"predict_time",
],
)
-parser.add_argument(
+clean_data_parser.add_argument(
"--pareto_dict",
help="Path to (optional) pareto set dictionary.",
default=None,
)
-def main(args):
+def clean_data_main(args):
logging.basicConfig(level=args.verbosity)
assert Path(
args.input_file,
@@ -726,5 +728,5 @@ def main(args):
if __name__ == "__main__":
- args = parser.parse_args()
- main(args)
+ args = clean_data_parser.parse_args()
+ clean_data_main(args)
diff --git a/deckard/layers/compile.py b/deckard/layers/compile.py
index 4a33e818..28a33a56 100644
--- a/deckard/layers/compile.py
+++ b/deckard/layers/compile.py
@@ -4,6 +4,7 @@
import logging
from tqdm import tqdm
import yaml
+import argparse
logger = logging.getLogger(__name__)
@@ -172,13 +173,13 @@ def load_results(results_file, results_folder) -> pd.DataFrame:
Path(results_folder).mkdir(exist_ok=True, parents=True)
suffix = results_file.suffix
if suffix == ".csv":
- results = pd.read_csv(results_file)
+ results = pd.read_csv(results_file, index_col=0)
elif suffix == ".xlsx":
- results = pd.read_excel(results_file)
+ results = pd.read_excel(results_file, index_col=0)
elif suffix == ".html":
- results = pd.read_html(results_file)
+ results = pd.read_html(results_file, index_col=0)
elif suffix == ".json":
- results = pd.read_json(results_file)
+ results = pd.read_json(results_file, index_col=0)
elif suffix == ".tex":
pd.read_csv(
results_file,
@@ -187,6 +188,7 @@ def load_results(results_file, results_folder) -> pd.DataFrame:
skiprows=4,
skipfooter=3,
engine="python",
+ index_col=0,
)
else:
raise ValueError(f"File type {suffix} not supported.")
@@ -196,16 +198,7 @@ def load_results(results_file, results_folder) -> pd.DataFrame:
return results
-if __name__ == "__main__":
- import argparse
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--results_file", type=str, default="results.csv")
- parser.add_argument("--report_folder", type=str, default="reports", required=True)
- parser.add_argument("--results_folder", type=str, default=".")
- parser.add_argument("--exclude", type=list, default=None, nargs="*")
- parser.add_argument("--verbose", type=str, default="INFO")
- args = parser.parse_args()
+def compile_main(parse_results, save_results, args):
logging.basicConfig(level=args.verbose)
report_folder = args.report_folder
results_file = args.results_file
@@ -215,3 +208,20 @@ def load_results(results_file, results_folder) -> pd.DataFrame:
assert Path(
report_file,
).exists(), f"Results file {report_file} does not exist. Something went wrong."
+
+
+compile_parser = argparse.ArgumentParser()
+compile_parser.add_argument("--results_file", type=str, default="results.csv")
+compile_parser.add_argument(
+ "--report_folder",
+ type=str,
+ default="reports",
+ required=True,
+)
+compile_parser.add_argument("--results_folder", type=str, default=".")
+compile_parser.add_argument("--exclude", type=list, default=None, nargs="*")
+compile_parser.add_argument("--verbose", type=str, default="INFO")
+
+if __name__ == "__main__":
+ args = compile_parser.parse_args()
+ compile_main(parse_results, save_results, args)
diff --git a/deckard/layers/deploy.py b/deckard/layers/deploy.py
deleted file mode 100644
index a1fe99ed..00000000
--- a/deckard/layers/deploy.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import logging
-import argparse
-from pathlib import Path
-import yaml
-from ..iaac import GCP_Config
-
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO)
-if __name__ == "__main__":
- iaac_parser = argparse.ArgumentParser()
- iaac_parser.add_argument("--verbosity", type=str, default="INFO")
- iaac_parser.add_argument("--config_dir", type=str, default="conf/deploy")
- iaac_parser.add_argument("--config_file", type=str, default="default.yaml")
- iaac_parser.add_argument("--workdir", type=str, default=".")
- args = iaac_parser.parse_args()
- config_dir = Path(args.workdir, args.config_dir).resolve().as_posix()
- config_file = Path(config_dir, args.config_file).resolve().as_posix()
- with open(config_file, "r") as f:
- params = yaml.load(f, Loader=yaml.FullLoader)
- gcp = GCP_Config(**params)
- logging.basicConfig(level=args.verbosity)
- assert gcp() is None, "Error creating cluster"
diff --git a/deckard/layers/find_best.py b/deckard/layers/find_best.py
index 9cb34315..7cebd456 100644
--- a/deckard/layers/find_best.py
+++ b/deckard/layers/find_best.py
@@ -25,6 +25,19 @@ def find_optuna_best(
):
logger.info(f"Study name: {study_name}")
logger.info(f"Storage name: {storage_name}")
+ # Validate the directions
+ if isinstance(direction, str):
+ directions = [direction]
+ else:
+ assert isinstance(
+ directions,
+ list,
+ ), f"Directions is not a list: {type(directions)}"
+ for direction in directions:
+ assert direction in [
+ "minimize",
+ "maximize",
+ ], f"Direction {direction} not recognized."
if isinstance(direction, str):
study = optuna.create_study(
study_name=study_name,
@@ -41,9 +54,67 @@ def find_optuna_best(
directions=direction,
)
directions = direction
- assert isinstance(directions, list), f"Directions is not a list: {type(directions)}"
+ # Convert directions to bools
+ directions = [False if x == "maximize" else True for x in directions]
+ # Get the trials dataframe
df = study.trials_dataframe(attrs=("number", "value", "params"))
# Find the average of each value over the columns in average_over
+ # df = group_by_params(df)
+ if study_csv is not None:
+ Path(study_csv).parent.mkdir(parents=True, exist_ok=True)
+ df.to_csv(study_csv)
+ # To dotlist
+ params = merge_best_with_default(
+ config_folder,
+ default_config,
+ config_subdir,
+ study,
+ )
+ if params_file is not None:
+ params_file = create_new_config_in_subdir(
+ params_file,
+ config_folder,
+ default_config,
+ config_subdir,
+ params,
+ )
+ return params
+
+
+def merge_best_with_default(
+ config_folder,
+ default_config,
+ config_subdir,
+ study,
+ use_optuna_best=True,
+):
+ if use_optuna_best is True:
+ best_params = flatten_dict(study.best_params)
+ more_params = flatten_dict(study.best_trial.user_attrs)
+ even_more_params = flatten_dict(study.best_trial.system_attrs)
+ logger.debug(f"Best params: {best_params}")
+ logger.debug(f"Best user params: {more_params}")
+ logger.debug(f"Best system params: {even_more_params}")
+ else:
+ raise NotImplementedError("Not implemented yet.")
+ # Merge all the params
+ best_params = OmegaConf.to_container(
+ OmegaConf.merge(best_params, more_params, even_more_params),
+ resolve=False,
+ )
+ # to dotlist
+ best_params = flatten_dict(best_params)
+ overrides = get_overrides(config_subdir, best_params)
+ params = override_default_with_best(
+ config_folder,
+ default_config,
+ overrides,
+ config_subdir=config_subdir,
+ )
+ return params
+
+
+def group_by_params(df):
not_these = ["number", "value"]
val_cols = [
col
@@ -51,11 +122,9 @@ def find_optuna_best(
if col.startswith("values_") and col.split("values_")[-1] not in not_these
]
not_these.extend(val_cols)
- print(f"Not these: {not_these}")
groupby_cols = [
col for col in df.columns if col.split("params_")[-1] not in not_these
]
- print(f"Groupby cols: {groupby_cols}")
dfs = df.groupby(groupby_cols)
new_df = pd.DataFrame(columns=groupby_cols + ["mean", "std", "ntrials", "nuniques"])
means = []
@@ -82,30 +151,11 @@ def find_optuna_best(
new_df["std"] = stds
new_df["ntrials"] = ntrials
new_df["nuniques"] = nuniques
- for direction in directions:
- assert direction in [
- "minimize",
- "maximize",
- ], f"Direction {direction} not recognized."
- directions = [False if x == "maximize" else True for x in directions]
- assert isinstance(new_df, pd.DataFrame), f"df is not a dataframe: {type(df)}"
- if study_csv is not None:
- Path(study_csv).parent.mkdir(parents=True, exist_ok=True)
- df.to_csv(study_csv)
- # To dotlist
- best_params = flatten_dict(study.best_params)
- more_params = flatten_dict(study.best_trial.user_attrs)
- even_more_params = flatten_dict(study.best_trial.system_attrs)
- logger.debug(f"Best params: {best_params}")
- logger.debug(f"Best user params: {more_params}")
- logger.debug(f"Best system params: {even_more_params}")
- # Merge all the params
- best_params = OmegaConf.to_container(
- OmegaConf.merge(best_params, more_params, even_more_params),
- resolve=False,
- )
- # to dotlist
- best_params = flatten_dict(best_params)
+ assert isinstance(new_df, pd.DataFrame), f"df is not a dataframe: {type(new_df)}"
+ return new_df
+
+
+def get_overrides(config_subdir, best_params):
overrides = []
# Changing the keys to hydra override format
for key, value in best_params.items():
@@ -130,21 +180,7 @@ def find_optuna_best(
logger.info(f"Adding {key} to param list")
else:
logger.debug(f"Skipping {key} because it is not in {config_subdir}")
- params = override_default_with_best(
- config_folder,
- default_config,
- overrides,
- config_subdir=config_subdir,
- )
- if params_file is not None:
- params_file = create_new_config_in_subdir(
- params_file,
- config_folder,
- default_config,
- config_subdir,
- params,
- )
- return params
+ return overrides
def create_new_config_in_subdir(
@@ -176,7 +212,6 @@ def create_new_config_in_subdir(
with open(params_file.with_suffix(".yaml"), "w") as f:
yaml.dump(params, f)
assert params_file.exists(), f"{params_file.resolve().as_posix()} does not exist."
-
return params_file
@@ -195,27 +230,25 @@ def override_default_with_best(
return cfg
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument("--params_file", type=str, default=True)
-
- parser.add_argument("--study_csv", type=str, default=None)
- parser.add_argument("--config_folder", type=str, default=Path(Path(), "conf"))
- parser.add_argument("--default_config", type=str, default="default")
- parser.add_argument("--config_subdir", type=str, default=None)
- parser.add_argument("--study_name", type=str, required=True)
- parser.add_argument("--config_name", type=str)
- parser.add_argument("--verbosity", type=str, default="INFO")
- parser.add_argument("--storage_name", type=str, required=True)
- parser.add_argument("--direction", type=str, default="maximize")
- parser.add_argument("--study_type", type=str, default="optuna")
- args = parser.parse_args()
+find_best_parser = argparse.ArgumentParser()
+find_best_parser.add_argument("--params_file", type=str, default=True)
+find_best_parser.add_argument("--study_csv", type=str, default=None)
+find_best_parser.add_argument("--config_folder", type=str, default=Path(Path(), "conf"))
+find_best_parser.add_argument("--default_config", type=str, default="default")
+find_best_parser.add_argument("--config_subdir", type=str, default=None)
+find_best_parser.add_argument("--study_name", type=str, required=True)
+find_best_parser.add_argument("--config_name", type=str)
+find_best_parser.add_argument("--verbosity", type=str, default="INFO")
+find_best_parser.add_argument("--storage_name", type=str, required=True)
+find_best_parser.add_argument("--direction", type=str, default="maximize")
+find_best_parser.add_argument("--study_type", type=str, default="optuna")
+
+
+def find_best_main(find_optuna_best, args):
args.config_folder = Path(args.config_folder).resolve().as_posix()
logging
if args.study_type == "optuna":
- study_name = args.study_name
- storage_name = args.storage_name
direction = args.direction
if len(direction) == 1:
direction = direction[0]
@@ -231,3 +264,8 @@ def override_default_with_best(
)
else:
raise NotImplementedError(f"Study type {args.study_type} not implemented.")
+
+
+if __name__ == "__main__":
+ args = find_best_parser.parse_args()
+ find_best_main(find_optuna_best, args)
diff --git a/deckard/layers/generate_grid.py b/deckard/layers/generate_grid.py
index 487ce801..66c9628f 100644
--- a/deckard/layers/generate_grid.py
+++ b/deckard/layers/generate_grid.py
@@ -4,6 +4,7 @@
import yaml
from functools import reduce
from operator import mul
+import argparse
from ..base.utils import make_grid, my_hash
logger = logging.getLogger(__name__)
@@ -74,13 +75,13 @@ def generate_grid_from_folders(conf_dir, regex):
return big_list
-def generate_queue(
- conf_root,
- grid_dir,
- regex,
- queue_folder="queue",
- default_file="default.yaml",
-):
+def generate_grid_main(args):
+ conf_root = args.conf_root
+ grid_dir = args.grid_folder
+ regex = args.regex
+ queue_folder = args.queue_folder
+ default_file = args.default_file
+ output_file = args.output_file
this_dir = os.getcwd()
conf_dir = os.path.join(this_dir, conf_root, grid_dir)
logger.debug(f"Looking for configs in {conf_dir}")
@@ -102,12 +103,51 @@ def generate_queue(
yaml.dump(big_list[i], outfile, default_flow_style=False)
assert Path(path, name + ".yaml").exists()
i += 1
+ if output_file is not None:
+ with open(output_file, "w") as outfile:
+ yaml.dump(big_list, outfile, default_flow_style=False)
+ assert Path(output_file).exists()
return big_list
-conf_root = "conf"
-grid_folder = "grid"
-regex = "*.yaml"
-
-big_list = generate_queue(conf_root, grid_folder, regex)
-print(yaml.dump(big_list[0]))
+generate_grid_parser = argparse.ArgumentParser()
+generate_grid_parser.add_argument(
+ "--conf_root",
+ type=str,
+ default="conf",
+ help="Root directory for config files",
+)
+generate_grid_parser.add_argument(
+ "--grid_folder",
+ type=str,
+ default="grid",
+ help="Folder containing config files",
+)
+generate_grid_parser.add_argument(
+ "--regex",
+ type=str,
+ default="*.yaml",
+ help="Regex for finding config files",
+)
+generate_grid_parser.add_argument(
+ "--queue_folder",
+ type=str,
+ default="queue",
+ help="Folder for queue files",
+)
+generate_grid_parser.add_argument(
+ "--default_file",
+ type=str,
+ default="default.yaml",
+ help="Default config file",
+)
+generate_grid_parser.add_argument(
+ "--output_file",
+ type=str,
+ default=None,
+ help="Output file for grid",
+)
+
+if __name__ == "__main__":
+ args = generate_grid_parser.parse_args()
+ generate_grid_main(args)
diff --git a/deckard/layers/generate_webpage.py b/deckard/layers/generate_webpage.py
deleted file mode 100644
index bd2699c7..00000000
--- a/deckard/layers/generate_webpage.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import os
-import csv
-from bs4 import BeautifulSoup
-
-
-def generate_html_file(csv_file_path, output_folder):
- # Read the CSV file
- with open(csv_file_path, "r") as file:
- reader = csv.reader(file)
- data = list(reader)
-
- # Get the title of the CSV file
- file_name = os.path.basename(csv_file_path)
- title = os.path.splitext(file_name)[0]
-
- # Create an HTML file path and open the file
- html_file_path = os.path.join(output_folder, f"{title}.html")
- with open(html_file_path, "w") as html_file:
- # Create a BeautifulSoup object
- soup = BeautifulSoup("", "html.parser")
-
- # Add the title to the HTML file
- soup.append(BeautifulSoup(f"
{title} ", "html.parser"))
-
- # Create an HTML table from the CSV data
- table_html = ""
- for row in data:
- table_html += ""
- for cell in row:
- # Check if the cell is a string representing a valid path
- if isinstance(cell, str) and os.path.exists(cell):
- # Create a hyperlink with the capitalized name of the file
- file_name = os.path.basename(cell)
- link_title = os.path.splitext(file_name)[0]
- cell = f'{link_title.capitalize()} '
-
- table_html += f"{cell} "
- table_html += " "
- table_html += "
"
-
- # Add the table to the HTML file
- soup.append(BeautifulSoup(table_html, "html.parser"))
-
- # Write the HTML content to the file
- html_file.write(soup.prettify())
-
-
-def parse_folder(folder_path):
- # Create the output folder if it doesn't exist
- os.makedirs(folder_path, exist_ok=True)
-
- # Iterate over the CSV files in the folder
- for file_name in os.listdir(folder_path):
- if file_name.endswith(".csv"):
- csv_file_path = os.path.join(folder_path, file_name)
- generate_html_file(csv_file_path, folder_path)
-
-
-# Define the folder path containing CSV files
-folder_path = "output/reports" # Update with your folder path
-
-# Parse the folder and generate HTML files
-parse_folder(folder_path)
diff --git a/deckard/layers/hydra_test.py b/deckard/layers/hydra_test.py
index b21fc076..21db541a 100644
--- a/deckard/layers/hydra_test.py
+++ b/deckard/layers/hydra_test.py
@@ -1,17 +1,55 @@
from omegaconf import DictConfig, OmegaConf
from pathlib import Path
+import sys
import hydra
-import os
-working_dir = os.getcwd()
-config_path = Path(working_dir, "conf").as_posix()
+working_dir = Path().cwd()
+config_dir = "conf"
+config_path = Path(working_dir, config_dir).as_posix()
+config_file = "default"
-@hydra.main(version_base=None, config_path=config_path, config_name="default")
-def my_app(cfg: DictConfig) -> None:
- print(OmegaConf.to_yaml(cfg))
- return 0
+def hydra_test_main():
+ # Use sys calls to look for --working_dir, --config_dir, and --config_file
+ args = sys.argv
+ if "--working_dir" in args:
+ working_dir = args[args.index("--working_dir") + 1]
+ # remove working_dir from args
+ args.pop(args.index("--working_dir"))
+ args.pop(args.index(working_dir))
+ else:
+ working_dir = Path().cwd()
+ if "--config_dir" in args:
+ config_dir = args[args.index("--config_dir") + 1]
+ # remove config_dir from args
+ args.pop(args.index("--config_dir"))
+ args.pop(args.index(config_dir))
+ else:
+ config_dir = "conf"
+ if "--config_file" in args:
+ config_file = args[args.index("--config_file") + 1]
+ # remove config_file from args
+ args.pop(args.index("--config_file"))
+ args.pop(args.index(config_file))
+ else:
+ config_file = "default"
+ if "--version_base" in args:
+ version_base = args[args.index("--version_base") + 1]
+ # remove version_base from args
+ args.pop(args.index("--version_base"))
+ args.pop(args.index(version_base))
+ else:
+ version_base = "1.3"
+
+ @hydra.main(
+ version_base=version_base,
+ config_path=config_path,
+ config_name=config_file,
+ )
+ def hydra_main(cfg: DictConfig) -> None:
+ print(OmegaConf.to_yaml(cfg))
+ return 0
if __name__ == "__main__":
- my_app()
+ hydra_test_main()
diff --git a/deckard/layers/merge.py b/deckard/layers/merge.py
index 13d62ea1..991b554d 100644
--- a/deckard/layers/merge.py
+++ b/deckard/layers/merge.py
@@ -9,7 +9,7 @@
logger = logging.getLogger(__name__)
-__all__ = ["merge_csv", "main", "parser"]
+__all__ = ["merge_csv", "merge_main", "merge_parser"]
def merge_csv(
@@ -129,7 +129,7 @@ def parse_cleaning_config(config_file, metadata_file=None, subset_metadata_file=
return dict_
-def main(args):
+def merge_main(args):
config = parse_cleaning_config(args.config, args.metadata, args.subset_metadata)
if args.output_folder is None:
args.output_folder = Path().cwd()
@@ -199,33 +199,33 @@ def add_subset_metadata(df, metadata_list=[]):
return df
-parser = argparse.ArgumentParser()
-parser.add_argument(
+merge_parser = argparse.ArgumentParser()
+merge_parser.add_argument(
"--output_file",
type=str,
help="Name of the output file",
default="merged.csv",
)
-parser.add_argument(
+merge_parser.add_argument(
"--output_folder",
type=str,
help="Name of the output folder",
required=False,
)
-parser.add_argument(
+merge_parser.add_argument(
"--smaller_file",
type=str,
help="Name(s) of the files to merge into the big file.",
required=False,
nargs="*",
)
-parser.add_argument(
+merge_parser.add_argument(
"--config",
type=str,
help="Name of file containing a 'fillna' config dictionary.",
required=False,
)
-parser.add_argument(
+merge_parser.add_argument(
"--metadata",
type=str,
help="Name of file containing a 'metadata' dictionary.",
@@ -233,14 +233,14 @@ def add_subset_metadata(df, metadata_list=[]):
# set default to --config
default=None,
)
-parser.add_argument(
+merge_parser.add_argument(
"--subset_metadata",
type=str,
help="Name of file containing a 'subset_metadata' dictionary.",
required=False,
default=None,
)
-parser.add_argument(
+merge_parser.add_argument(
"--how",
type=str,
help="Type of merge to perform. Default is 'outer'.",
@@ -248,5 +248,5 @@ def add_subset_metadata(df, metadata_list=[]):
)
if __name__ == "__main__":
- args = parser.parse_args()
- main(args)
+ args = merge_parser.parse_args()
+ merge_main(args)
diff --git a/deckard/layers/optimise.py b/deckard/layers/optimise.py
index 9f96bd9c..9c6bfdf9 100644
--- a/deckard/layers/optimise.py
+++ b/deckard/layers/optimise.py
@@ -188,7 +188,7 @@ def parse_stage(stage: str = None, params: dict = None, path=None) -> dict:
key_list.extend(new_keys)
else:
- raise TypeError(f"Expected str or dict, got {type(params)}")
+ raise TypeError(f"Expected dict, got {type(params)}")
params = read_subset_of_params(key_list, params)
# Load files from dvc
with open(Path(path, "dvc.yaml"), "r") as f:
@@ -215,7 +215,7 @@ def parse_stage(stage: str = None, params: dict = None, path=None) -> dict:
if "metrics" in pipe:
metric_list = [str(x).split(":")[0] for x in pipe["metrics"]]
file_list.extend(metric_list)
- file_string = str(file_list)
+ file_string = str(file_list).replace("item.", "")
files = params["files"]
file_list = list(files.keys())
for key in file_list:
@@ -324,8 +324,8 @@ def optimise(cfg: DictConfig) -> None:
logger = logging.getLogger(__name__)
@hydra.main(config_path=config_path, config_name=config_name, version_base="1.3")
- def hydra_optimise(cfg: DictConfig) -> float:
+ def optimise_main(cfg: DictConfig) -> float:
score = optimise(cfg)
return score
- hydra_optimise()
+ optimise_main()
diff --git a/deckard/layers/parse.py b/deckard/layers/parse.py
index 44a2200b..3a4eec4e 100644
--- a/deckard/layers/parse.py
+++ b/deckard/layers/parse.py
@@ -5,6 +5,8 @@
from omegaconf import OmegaConf
from .utils import save_params_file
+__all__ = ["parse_hydra_config", "hydra_parser"]
+
logger = logging.getLogger(__name__)
hydra_parser = argparse.ArgumentParser()
hydra_parser.add_argument("overrides", type=str, nargs="*", default=None)
diff --git a/deckard/layers/plots.py b/deckard/layers/plots.py
index af653714..6e37ce7f 100644
--- a/deckard/layers/plots.py
+++ b/deckard/layers/plots.py
@@ -5,6 +5,7 @@
import seaborn as sns
import yaml
from pathlib import Path
+import numpy as np
logger = logging.getLogger(__name__)
sns.set_theme(style="whitegrid", font_scale=1.8, font="times new roman")
@@ -35,14 +36,18 @@ def cat_plot(
folder,
xlabels=None,
ylabels=None,
+ xticklabels=None,
+ yticklabels=None,
titles=None,
legend_title=None,
x_lim=None,
y_lim=None,
hue_order=None,
rotation=0,
- set={},
filetype=".eps",
+ x_scale=None,
+ y_scale=None,
+ digitize=[],
**kwargs,
):
"""
@@ -88,12 +93,16 @@ def cat_plot(
"""
plt.gcf().clear()
+ plt.cla()
+ plt.clf()
+ # clear the Axes object
suffix = Path(file).suffix
if suffix is not None:
file = Path(file)
else:
file = Path(file).with_suffix(filetype)
logger.info(f"Rendering graph {file}")
+ data = digitize_cols(data, digitize)
if hue is not None:
data = data.sort_values(by=[hue, x, y])
logger.debug(
@@ -112,12 +121,31 @@ def cat_plot(
data = data.sort_values(by=[x, y])
logger.debug(f"Data sorted by x:{x}, y:{y}, kind:{kind}, and kwargs:{kwargs}.")
graph = sns.catplot(data=data, x=x, y=y, kind=kind, **kwargs)
- if xlabels is not None:
- graph.set_xlabels(xlabels)
- if ylabels is not None:
- graph.set_ylabels(ylabels)
+ # graph is a FacetGrid object and we need to set the x,y scales, labels, titles on the axes
+ for graph_ in graph.axes.flat:
+ if y_scale is not None:
+ graph_.set_yscale(y_scale)
+ if x_scale is not None:
+ graph_.set_xscale(x_scale)
+ if xticklabels is not None:
+ graph_.set_xticklabels(xticklabels)
+ if yticklabels is not None:
+ graph_.set_yticklabels(yticklabels)
if titles is not None:
- graph.set_titles(titles)
+ if isinstance(titles, dict):
+ graph.set_titles(**titles)
+ elif isinstance(titles, str):
+ graph.set_titles(titles)
+ else:
+ try:
+ graph.set_titles("{row_name} | {col_name}")
+ except KeyError as e:
+ if "row_name" in str(e):
+ graph.set_titles("{col_name}")
+ elif "col_name" in str(e):
+ graph.set_titles("{row_name}")
+ else:
+ raise e
if legend_title is not None:
graph.legend.set_title(title=legend_title)
else:
@@ -125,8 +153,11 @@ def cat_plot(
graph.legend.remove()
else:
pass
+ if xlabels is not None:
+ graph.set_xlabels(xlabels)
+ if ylabels is not None:
+ graph.set_ylabels(ylabels)
graph.set_xticklabels(graph.axes.flat[-1].get_xticklabels(), rotation=rotation)
- graph.set(**set)
if x_lim is not None:
graph.set(xlim=x_lim)
if y_lim is not None:
@@ -134,9 +165,29 @@ def cat_plot(
graph.tight_layout()
graph.savefig(folder / file)
plt.gcf().clear()
+ plt.cla()
+ plt.clf()
logger.info(f"Saved graph to {folder / file}")
+def digitize_cols(data, digitize):
+ if isinstance(digitize, str):
+ digitize = [digitize]
+ else:
+ assert isinstance(
+ digitize,
+ list,
+ ), "digitize must be a list of columns to digitize"
+ if len(digitize) > 0:
+ for col in digitize:
+ min_ = data[col].min()
+ max_ = data[col].max()
+ NUMBER_OF_BINS = 10
+ bins = np.linspace(min_, max_, NUMBER_OF_BINS)
+ data[col] = np.digitize(data[col], bins) / NUMBER_OF_BINS
+ return data
+
+
def line_plot(
data,
x,
@@ -193,6 +244,8 @@ def line_plot(
the line plot graph object.
"""
plt.gcf().clear()
+ plt.cla()
+ plt.clf()
suffix = Path(file).suffix
if suffix is not None:
file = Path(file)
@@ -223,6 +276,8 @@ def line_plot(
graph.get_figure().savefig(folder / file)
logger.info(f"Saved graph to {folder/file}")
plt.gcf().clear()
+ plt.cla()
+ plt.clf()
return graph
@@ -285,6 +340,8 @@ def scatter_plot(
"""
plt.gcf().clear()
+ plt.cla()
+ plt.clf()
suffix = Path(file).suffix
if suffix is not None:
file = Path(file)
@@ -320,38 +377,40 @@ def scatter_plot(
logger.info(f"Saved graph to {Path(folder) / file}")
plt.gcf().clear()
+ plt.cla()
+ plt.clf()
return graph
-parser = argparse.ArgumentParser()
-parser.add_argument(
+plots_parser = argparse.ArgumentParser()
+plots_parser.add_argument(
"-p",
"--path",
type=str,
help="Path to the plot folder",
required=True,
)
-parser.add_argument(
+plots_parser.add_argument(
"-f",
"--file",
type=str,
help="Data file to read from",
required=True,
)
-parser.add_argument(
+plots_parser.add_argument(
"-t",
"--plotfiletype",
type=str,
help="Filetype of the plots",
default=".eps",
)
-parser.add_argument(
+plots_parser.add_argument(
"-v",
"--verbosity",
default="INFO",
help="Increase output verbosity",
)
-parser.add_argument(
+plots_parser.add_argument(
"-c",
"--config",
help="Path to the config file",
@@ -359,7 +418,7 @@ def scatter_plot(
)
-def main(args):
+def plots_main(args):
logging.basicConfig(level=args.verbosity)
assert Path(
args.file,
@@ -390,20 +449,19 @@ def main(args):
logger.info(f"Creating folder {FOLDER}")
FOLDER.mkdir(parents=True, exist_ok=True)
- cat_plot_list = big_dict.get("cat_plot", [])
- for dict_ in cat_plot_list:
- cat_plot(data, **dict_, folder=FOLDER, filetype=IMAGE_FILETYPE)
-
line_plot_list = big_dict.get("line_plot", [])
for dict_ in line_plot_list:
line_plot(data, **dict_, folder=FOLDER, filetype=IMAGE_FILETYPE)
- scatter_plot_list = big_dict.get("scatter_plot", [])
scatter_plot_list = big_dict.get("scatter_plot", [])
for dict_ in scatter_plot_list:
scatter_plot(data, **dict_, folder=FOLDER, filetype=IMAGE_FILETYPE)
+ cat_plot_list = big_dict.get("cat_plot", [])
+ for dict_ in cat_plot_list:
+ cat_plot(data, **dict_, folder=FOLDER, filetype=IMAGE_FILETYPE)
+
if __name__ == "__main__":
- args = parser.parse_args()
- main(args)
+ args = plots_parser.parse_args()
+ plots_main(args)
diff --git a/deckard/layers/prepare_queue.py b/deckard/layers/prepare_queue.py
index 6c4aeb94..ddec462d 100644
--- a/deckard/layers/prepare_queue.py
+++ b/deckard/layers/prepare_queue.py
@@ -1,6 +1,6 @@
import logging
-import os
from copy import deepcopy
+import sys
from pathlib import Path
import yaml
from hydra.utils import instantiate
@@ -273,27 +273,61 @@ def prepare_experiment_folder(cfg: DictConfig) -> None:
return exp, scorer, direction, folder, id_
-if __name__ == "__main__":
- logger = logging.getLogger(__name__)
- config_path = os.environ.pop(
- "DECKARD_CONFIG_PATH",
- str(Path(Path(), "conf").absolute().as_posix()),
- )
- config_name = os.environ.pop("DECKARD_DEFAULT_CONFIG", "default.yaml")
+def prepare_queue_main():
+ # Use sys calls to look for --working_dir, --config_dir, and --config_file
+ args = sys.argv
+ global working_dir
+ if "--working_dir" in args:
+ working_dir = args[args.index("--working_dir") + 1]
+ # remove working_dir from args
+ args.pop(args.index("--working_dir"))
+ args.pop(args.index(working_dir))
+ else:
+ working_dir = Path(".").cwd()
+ print(working_dir)
+ if "--config_dir" in args:
+ config_dir = args[args.index("--config_dir") + 1]
+ # remove config_dir from args
+ args.pop(args.index("--config_dir"))
+ args.pop(args.index(config_dir))
+ else:
+ config_dir = "conf"
+ config_dir = Path(working_dir, config_dir).as_posix()
+ if "--config_file" in args:
+ config_file = args[args.index("--config_file") + 1]
+ # remove config_file from args
+ args.pop(args.index("--config_file"))
+ args.pop(args.index(config_file))
+ else:
+ config_file = "default"
+ if "--version_base" in args:
+ version_base = args[args.index("--version_base") + 1]
+ # remove version_base from args
+ args.pop(args.index("--version_base"))
+ args.pop(args.index(version_base))
+ else:
+ version_base = "1.3"
- @hydra.main(config_path=config_path, config_name=config_name, version_base="1.3")
+ @hydra.main(
+ config_path=config_dir,
+ config_name=config_file,
+ version_base=version_base,
+ )
def hydra_prepare(cfg: DictConfig) -> float:
exp, scorer, direction, folder, id_ = prepare_experiment_folder(cfg)
assert isinstance(exp, Experiment), f"Expected Experiment, got {type(exp)}."
assert isinstance(scorer, (str, list)), f"Expected list, got {type(scorer)}."
assert isinstance(direction, str), f"Expected str, got {type(direction)}."
- assert direction in [
- "minimize",
- "maximize",
- ], f"Expected 'minimize' or 'maximize', got {direction}."
+ assert len(scorer) == len(
+ direction,
+ ), "Length of scorer and direction must match."
assert Path(
folder,
).exists(), f"Folder {folder} does not exist for experiment {id_}."
return 0
hydra_prepare()
+
+
+if __name__ == "__main__":
+ prepare_queue_main()
diff --git a/deckard/layers/query_kepler.py b/deckard/layers/query_kepler.py
index deb310b2..fe67fae3 100644
--- a/deckard/layers/query_kepler.py
+++ b/deckard/layers/query_kepler.py
@@ -1,8 +1,15 @@
-import logging
from datetime import datetime
-import pandas as pd
import argparse
-from prometheus_api_client import PrometheusConnect
+import logging
+import sys
+from dataclasses import dataclass
+import pandas as pd
+
+try:
+ from prometheus_api_client import PrometheusConnect
+except ImportError:
+ ImportError("Please install prometheus_api_client")
+ sys.exit(1)
v100 = 250 / 3600
@@ -10,6 +17,7 @@
l4 = 72 / 3600
+@dataclass
class PromQuery:
def __init__(self):
self.prom_host = "34.147.65.220"
@@ -60,7 +68,15 @@ def caluculate_minutes(self):
return str(int(self.total / 60)) + "m"
-def run_query(input_file, output_file):
+def kepler_main(args):
+ input_file = args.input_file
+ output_file = args.output_file
+ logging.basicConfig(
+ level=args.verbosity,
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+ )
+ logger = logging.getLogger(__name__)
+ logger.info("Quering the Prometheus for power metrics")
new_columns = [
"train_power",
"predict_power",
@@ -109,21 +125,12 @@ def run_query(input_file, output_file):
data.to_csv(output_file)
-if __name__ == "__main__":
- logger = logging.getLogger(__name__)
- dvc_parser = argparse.ArgumentParser()
- dvc_parser.add_argument("--input_file", type=str, default=None)
- dvc_parser.add_argument("--output_file", type=str, default=None)
- dvc_parser.add_argument("--verbosity", type=str, default="INFO")
-
- args = dvc_parser.parse_args()
- input_file = args.input_file
- output_file = args.output_file
+kepler_parser = argparse.ArgumentParser()
+kepler_parser.add_argument("--input_file", type=str, default=None)
+kepler_parser.add_argument("--output_file", type=str, default=None)
+kepler_parser.add_argument("--verbosity", type=str, default="INFO")
- logging.basicConfig(
- level=args.verbosity,
- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
- )
- logger.info("Quering the Prometheus for power metrics")
- results = run_query(input_file=input_file, output_file=output_file)
+if __name__ == "__main__":
+ args = kepler_parser.parse_args()
+ results = kepler_main(args)
diff --git a/examples/classification/plots.ipynb b/examples/classification/plots.ipynb
deleted file mode 100644
index 1ef9111e..00000000
--- a/examples/classification/plots.ipynb
+++ /dev/null
@@ -1,252 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import seaborn as sns\n",
- "import pandas as pd\n",
- "import matplotlib.pyplot as plt\n",
- "import numpy as np\n",
- "\n",
- "\n",
- "# Load data\n",
- "df = pd.read_csv(\"output/attack.csv\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "dict_keys(['attacks', 'defences', 'params'])\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_3723846/651469242.py:12: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " attack_results['Kernel'] = attack_results['model.init.kwargs.kernel']\n"
- ]
- }
- ],
- "source": [
- "from deckard.layers.compile import clean_data_for_plotting\n",
- "import yaml\n",
- "\n",
- "with open(\"conf/compile.yaml\", \"r\") as f:\n",
- " config = yaml.load(f, Loader=yaml.FullLoader)\n",
- "print(config.keys())\n",
- "def_gen_dict = config[\"defences\"]\n",
- "atk_gen_dict = config[\"attacks\"]\n",
- "control_dict = config[\"params\"]\n",
- "\n",
- "df = clean_data_for_plotting(df, def_gen_dict, atk_gen_dict, control_dict)\n",
- "attack_results = df.dropna(subset=[\"accuracy\", \"adv_accuracy\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "fig, ax = plt.subplots(2, 2)\n",
- "graph5 = sns.lineplot(\n",
- " x=\"attack.init.kwargs.eps\",\n",
- " y=\"accuracy\",\n",
- " data=attack_results,\n",
- " style=\"model.init.kwargs.kernel\",\n",
- " ax=ax[0, 0],\n",
- " legend=False,\n",
- " color=\"darkred\",\n",
- " style_order=[\"rbf\", \"poly\", \"linear\"],\n",
- ")\n",
- "graph5.set(xscale=\"log\", xlabel=\"Perturbation Distance\", ylabel=\"Accuracy\")\n",
- "graph6 = sns.lineplot(\n",
- " x=\"attack.init.kwargs.eps_step\",\n",
- " y=\"accuracy\",\n",
- " data=attack_results,\n",
- " style=\"model.init.kwargs.kernel\",\n",
- " ax=ax[0, 1],\n",
- " color=\"darkred\",\n",
- " style_order=[\"rbf\", \"poly\", \"linear\"],\n",
- ")\n",
- "graph6.set(xscale=\"log\", xlabel=\"Perturbation Step\", ylabel=\"Accuracy\")\n",
- "graph7 = sns.lineplot(\n",
- " x=\"attack.init.kwargs.max_iter\",\n",
- " y=\"accuracy\",\n",
- " data=attack_results,\n",
- " style=\"Kernel\",\n",
- " ax=ax[1, 0],\n",
- " legend=False,\n",
- " color=\"darkred\",\n",
- " style_order=[\"rbf\", \"poly\", \"linear\"],\n",
- ")\n",
- "graph7.set(xscale=\"log\", xlabel=\"Maximum Iterations\", ylabel=\"Accuracy\")\n",
- "graph8 = sns.lineplot(\n",
- " x=\"attack.init.kwargs.batch_size\",\n",
- " y=\"accuracy\",\n",
- " data=attack_results,\n",
- " style=\"Kernel\",\n",
- " ax=ax[1, 1],\n",
- " legend=False,\n",
- " color=\"darkred\",\n",
- " style_order=[\"rbf\", \"poly\", \"linear\"],\n",
- ")\n",
- "graph8.set(xscale=\"log\", xlabel=\"Batch Size\", ylabel=\"Accuracy\")\n",
- "graph6.legend(loc=\"center left\", bbox_to_anchor=(1, 0.5), ncol=1, title=\"Kernel\")\n",
- "fig.tight_layout()\n",
- "fig.savefig(\"plots/accuracy_vs_attack_parameters.pdf\")\n",
- "plt.gcf().clear()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "sns.lineplot(\n",
- " data=df,\n",
- " y=\"adv_fit_time\",\n",
- " x=\"attack.init.kwargs.eps\",\n",
- " hue=\"model.init.kwargs.kernel\",\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "sns.lineplot(\n",
- " data=df,\n",
- " y=\"adv_fit_time\",\n",
- " x=\"attack.init.kwargs.eps_step\",\n",
- " hue=\"model.init.kwargs.kernel\",\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "sns.lineplot(\n",
- " data=df,\n",
- " y=\"adv_fit_time\",\n",
- " x=\"attack.init.kwargs.batch_size\",\n",
- " hue=\"model.init.kwargs.kernel\",\n",
- ")"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "env",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/gzip/.gitignore b/examples/gzip/.gitignore
index 67e77e0e..14be55ba 100644
--- a/examples/gzip/.gitignore
+++ b/examples/gzip/.gitignore
@@ -7,9 +7,11 @@ kdd_nsl
2-22/*
2-28/*
3-7/*
+7-29/*
gzip/*
ddos/*
kdd_nsl/*
sms_spam/*
truthseeker/*
conf/*/best_*.yaml
+/params.yaml
diff --git a/examples/gzip/batchMixin.py b/examples/gzip/batchMixin.py
index 5cc762b7..d21098a4 100644
--- a/examples/gzip/batchMixin.py
+++ b/examples/gzip/batchMixin.py
@@ -4,12 +4,9 @@
from sklearn.datasets import make_classification
-import random
-
-# from gzip_classifier import GzipSVC, GzipKNN, GzipLogisticRegressor
-from sklearn.svm import SVC
+from pathlib import Path
+from time import time
from sklearn.model_selection import train_test_split
-import plotext
logger = logging.getLogger(__name__)
@@ -25,38 +22,96 @@ def __init__(
nb_epoch=1,
**kwargs,
):
- self.batch_size = kwargs.pop("m", batch_size)
+ self.batch_size = kwargs.pop("batch_size", batch_size)
self.max_batches = kwargs.pop("max_batches", max_batches)
+ self.training_log = kwargs.pop("training_log", None)
nb_epoch = kwargs.pop("nb_epoch", nb_epoch)
if not nb_epoch >= 1:
nb_epoch = 1
self.nb_epoch = nb_epoch
- if "m" in kwargs:
- logger.warning(
- f"Parameter 'm' is being overwritten with batch_size={self.batch_size}.",
- )
- kwargs["m"] = self.batch_size
super().__init__(**kwargs)
- self.predict = self.batched_predict(self.predict)
if hasattr(self, "_find_best_samples"):
self._find_best_samples = self.batched_find_best_samples(
self._find_best_samples,
)
- if hasattr(self, "score"):
- self.score = self.batched_score(self.score)
self.fit = self.batched_fit(self.fit)
- self.predict = self.batched_predict(self.predict)
if self.nb_epoch > 1:
self.fit = self.epoch_fit(self.fit)
- # self.score = self.batched_score(self.score)
def epoch_fit(self, fit_func):
def wrapper(*args, **kwargs):
X, y = args
- for i in range(self.nb_epoch):
- random.shuffle(X)
- random.shuffle(y)
+ X_test = kwargs.pop("X_test", None)
+ y_test = kwargs.pop("y_test", None)
+ log_file = self.training_log if hasattr(self, "training_log") else None
+ for i in tqdm(range(self.nb_epoch), desc="Epochs", leave=True, position=0):
+ # Shuffle the indices of X,y
+ indices = np.arange(len(X))
+ np.random.shuffle(indices)
+ X = X[indices]
+ y = y[indices]
+ logger.debug(f"Epoch {i + 1}/{self.nb_epoch}")
fit_func(X, y, **kwargs)
+ if hasattr(self, "score"):
+ score = self.score(X, y)
+ train_scores.append(score)
+ if X_test is not None:
+ assert len(X_test) == len(
+ y_test,
+ ), "X_test and y_test must have the same length"
+ test_score = self.score(X_test, y_test)
+ test_scores.append(test_score)
+ logger.info(f"Train score: {score}, Test score: {test_score}")
+ else:
+ logger.info(f"Train score: {score}")
+ if log_file is not None:
+ if Path(log_file).exists():
+ if i == 0:
+ # rotate the log file by appending a timestamp before the extension
+ rotated_log_name = log_file.replace(
+ ".csv",
+ f"_{int(time())}.csv",
+ )
+ # rename the log file
+ Path(log_file).rename(rotated_log_name)
+ with open(log_file, "w") as f:
+ f.write("epoch, train_score,")
+ if "test_score" in locals():
+ f.write(",test_score")
+ f.write("\n")
+ f.write(f"{i+1},")
+ f.write(f"{score},")
+ if "test_score" in locals():
+ f.write(f" {test_score},")
+ f.write("\n")
+ else:
+ with open(log_file, "a") as f:
+ # assuming csv format
+ f.write(f"{i+1},")
+ f.write(f"{score},")
+ if "test_score" in locals():
+ f.write(f"{test_score},")
+ f.write("\n")
+ else:
+ with open(log_file, "w") as f:
+ f.write("epoch, train_score,")
+ if "test_score" in locals():
+ f.write(" test_score,")
+ f.write("\n")
+ f.write(f"{i+1},")
+ f.write(f"{score},")
+ if "test_score" in locals():
+ f.write(f"{test_score},")
+ f.write("\n")
+ import plotext as plt
+
+ plt.plot(train_scores, label="Train score")
+ if X_test is not None:
+ plt.plot(test_scores, label="Test score")
+ plt.xlabel("Epochs")
+ plt.ylabel("Accuracy")
+ plt.title("Scores")
+ plt.show()
return wrapper
@@ -72,28 +127,16 @@ def wrapper(*args, **kwargs):
n_batches = self.max_batches
for i in tqdm(
range(n_batches),
- desc="Fitting batches",
total=n_batches,
+ desc="Fitting batches",
leave=False,
- dynamic_ncols=True,
+ position=1,
):
start = i * self.batch_size
end = (i + 1) * self.batch_size
X_batch = X_train[start:end]
y_batch = y_train[start:end]
- print(
- f"Shape of X_batch is {X_batch.shape} and shape of y_batch is {y_batch.shape}",
- )
fit_func(X_batch, y_batch, **kwargs)
- if self.nb_epoch > 1:
- continue
- train_score = self.score(X_batch, y_batch)
- test_score = self.score(X_train, y_train)
- print(
- f"Batch {i+1} of {n_batches} - Train score: {np.mean(train_score)}; Test score: {np.mean(test_score)}",
- )
- train_scores.append(train_score)
- test_scores.append(test_score)
return wrapper
@@ -120,8 +163,6 @@ def wrapper(method, **kwargs):
new_X = X[i * self.batch_size : (i + 1) * self.batch_size] # noqa
new_y = y[i * self.batch_size : (i + 1) * self.batch_size] # noqa
indices = func(X=new_X, y=new_y, method=method, n_jobs=n_jobs)
- # print("After finding best samples")
- # print(f"Length of indices is {len(indices)}")
X = X[indices]
y = y[indices]
self.X_ = X
@@ -133,75 +174,6 @@ def wrapper(method, **kwargs):
return wrapper
- def batched_predict(self, predict_func):
- def wrapper(*args, **kwargs):
- X_test = args[0]
- n = len(X_test)
- n_batches = n // self.batch_size
- if n_batches > self.max_batches:
- n_batches = self.max_batches
- elif n_batches == 0:
- n_batches = 1
- preds = []
- for i in tqdm(
- range(n_batches),
- desc="Predicting batches",
- total=n_batches,
- leave=False,
- dynamic_ncols=True,
- ):
- start = i * self.batch_size
- end = (i + 1) * self.batch_size
- X_batch = X_test[start:end]
- new_preds = predict_func(X_batch, **kwargs)
- preds.append(new_preds)
- return np.concatenate(preds)
-
- return wrapper
-
- def batched_score(self, score_func):
- def wrapper(*args, **kwargs):
- X_test, y_test = args
- n = len(X_test)
- n_batches = n // self.batch_size
- if n_batches > self.max_batches:
- n_batches = self.max_batches
- elif n_batches == 0:
- n_batches = 1
- scores = []
- for i in tqdm(
- range(n_batches),
- desc="Scoring batches",
- total=n_batches,
- leave=False,
- dynamic_ncols=True,
- ):
- start = i * self.batch_size
- end = (i + 1) * self.batch_size
- X_batch = X_test[start:end]
- y_batch = y_test[start:end]
- score = score_func(X_batch, y_batch, **kwargs)
- scores.append(score)
- return scores
-
- return wrapper
-
-
-def create_batched_class(cls, *args, **kwargs):
- name = cls.__name__
-
- class BatchedClass(cls, BatchedMixin):
- def __init__(self, *args, **kwargs):
- self.max_batches = kwargs.pop("max_batches", 100)
- self.batch_size = kwargs.pop("batch_size", 10)
- super().__init__(*args, **kwargs)
-
- batched_class = BatchedClass()
- combined_name = f"Batched{name}"
- batched_class.__name__ = combined_name
- batched_class.__init__(*args, **kwargs)
- return batched_class
-
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
@@ -236,20 +208,3 @@ def __init__(self, *args, **kwargs):
test_size=0.2,
random_state=42,
)
-
- class BatchedSVC(BatchedMixin, SVC):
- pass
-
- clf = BatchedSVC(max_batches=100, batch_size=100, kernel="rbf")
- clf.fit(X_train, y_train)
- score = clf.score(X_test, y_test)
- print(score)
- input("Press enter to continue")
- score = round(np.mean(score), 2)
- std = round(np.std(score), 3)
- logger.info(f"Final Score: {score}")
- logger.info(f"Standard Deviation: {std}")
- # if plotext_available is True:
- plotext.scatter(train_scores, label="Train scores")
- plotext.scatter(test_scores, label="Test scores")
- plotext.plot()
diff --git a/examples/gzip/conf/clean.yaml b/examples/gzip/conf/clean.yaml
index c5bc3dd5..0d329632 100644
--- a/examples/gzip/conf/clean.yaml
+++ b/examples/gzip/conf/clean.yaml
@@ -1,14 +1,3 @@
-# params:
- # control:
- # data.sample.train_size: 100
- # defaults:
- # model.init.m : -1
-# fillna:
-# model.init.compressor : "None"
-# model.init.metric : "ncd"
-# model.init.method : "random"
-# model.init.m : ${data.sample.random_state}
-# model.init.precompute : "False"
replace:
model.init.metric:
jaro: "Jaro"
@@ -18,11 +7,11 @@ replace:
ratio: "Ratio"
seqRatio: "SeqRatio"
hamming: "Hamming"
- gzip: "Gzip"
+ gzip: "GZIP"
pkl: "Pickle"
bz2: "BZ2"
- zstd: "Zstd"
- lzma : "Lzma"
+ zstd: "ZSTD"
+ lzma : "LZMA"
model_name:
GzipSVC : "k-SVC"
GzipLogisticRegressor : "k-Logistic"
@@ -30,6 +19,29 @@ replace:
model.init.symmetric:
True: "Symmetric"
False: "Asymmetric"
+ model.init.sampling_method:
+ random : "Random"
+ medoid : "Medoid"
+ sum : "Sum"
+ svc : "SVC"
+ hardness : "Hardness"
+ nearmiss : "NearMiss"
+ knn : "KNN"
+ dataset:
+ ddos : "DDoS"
+ sms_spam : "SMS Spam"
+ kdd_nsl : "KDD NSL"
+ truthseeker : "Truthseeker"
+ model.init.m :
+ -1 : 1
drop_values:
accuracy : 0.00000000000
predict_time : 1.00000000000
+replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
+
diff --git a/examples/gzip/conf/condense_knn.yaml b/examples/gzip/conf/condense_knn.yaml
index 52bd92be..82b73c54 100644
--- a/examples/gzip/conf/condense_knn.yaml
+++ b/examples/gzip/conf/condense_knn.yaml
@@ -44,7 +44,7 @@ hydra:
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
direction: ${direction}
storage: sqlite:///optuna.db
- study_name: ${dataset}_${model_name}_${stage}
+ study_name: ${dataset}_${model_name}_condense
n_trials: 2
n_jobs: 2
max_failure_rate: 1.0
@@ -52,8 +52,7 @@ hydra:
model.init.k : 1,3,5,7,11
+model.init.weights : uniform,distance
+model.init.algorithm : brute
- model.init.symmetric : True,False
- ++model.init.precompute : True
+ model.init.symmetric : True
model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name : ${model_name}
data.sample.random_state: 0,1,2,3,4,5,6,7,8,9
diff --git a/examples/gzip/conf/condense_logistic.yaml b/examples/gzip/conf/condense_logistic.yaml
index 5a585b06..9bb99fbd 100644
--- a/examples/gzip/conf/condense_logistic.yaml
+++ b/examples/gzip/conf/condense_logistic.yaml
@@ -42,7 +42,7 @@ hydra:
n_ei_candidates: 24
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
+ study_name: ${dataset}_${model_name}_condense
storage: sqlite:///optuna.db
n_jobs: 1
n_trials : 1
@@ -53,8 +53,7 @@ hydra:
+model.init.C : 1e-2,1e-1,1e0,1e1,1e2
+model.init.fit_intercept : True,False
+model.init.class_weight : balanced,None
- model.init.symmetric : True,False
- ++model.init.precompute : True
+ model.init.symmetric : True
model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name : ${model_name}
data.sample.random_state: 0,1,2,3,4,5,6,7,8,9
diff --git a/examples/gzip/conf/condense_svc.yaml b/examples/gzip/conf/condense_svc.yaml
index 478c9c97..6f1d3adf 100644
--- a/examples/gzip/conf/condense_svc.yaml
+++ b/examples/gzip/conf/condense_svc.yaml
@@ -44,7 +44,7 @@ hydra:
n_ei_candidates: 24
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ???
+ study_name: ${dataset}_${model_name}_condense
storage: sqlite:///optuna.db
n_jobs: 2
n_trials : 2
@@ -53,8 +53,8 @@ hydra:
+model.init.C : 1e-2,1e-1,1e0,1e1,1e2
+model.init.gamma : scale,auto
+model.init.class_weight : balanced,null
- ++model.init.precompute : True
model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ model.init.symmetric : True
model_name : ${model_name}
data.sample.random_state: 0,1,2,3,4,5,6,7,8,9
model.init.m: tag(log, interval(.1, 1))
diff --git a/examples/gzip/conf/condensed_plots.yaml b/examples/gzip/conf/condensed_plots.yaml
index 268802a3..de1d9e92 100644
--- a/examples/gzip/conf/condensed_plots.yaml
+++ b/examples/gzip/conf/condensed_plots.yaml
@@ -1,61 +1,88 @@
-line_plot:
- - file : sampling_method_vs_accuracy.pdf
- hue: model.init.sampling_method
- title: #"Accuracy vs Sampling Method"
- x : model.init.m
- xlabel: Percentage of Samples per Class
+cat_plot:
+ - file : condensing_method_vs_accuracy.pdf
+ digitize : Condensing Ratio
+ x: Condensing Method
+ hue : Condensing Ratio
y : accuracy
- ylabel: Accuracy
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim : [0, 1]
y_scale : linear
- legend: {"title": "Sampling Method", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
- - file: sampling_method_vs_train_time.pdf
- hue: model.init.sampling_method
- title: #"Training Time vs Sampling Method"
- x : model.init.m
- xlabel: Percentage of Samples per Class
+ legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ kind : boxen
+ col : Model
+ rotation : 45
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xlabels: "Condensing Method"
+ ylabels: "Accuracy"
+ legend_title : "Sample Ratio"
+ - file: condensing_method_vs_train_time.pdf
+ x: Condensing Method
+ hue : Condensing Ratio
+ digitize : Condensing Ratio
y : train_time
- ylabel: Training Time (s)
- y_scale : linear
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim : [0, 1]
- legend: {"title": "Sampling Method", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
- - file : sampling_method_vs_predict_time.pdf
- hue: model.init.sampling_method
- title: #"Prediction Time vs Sampling Method"
- x : model.init.m
- xlabel: Percentage of Samples per Class
+ y_scale : log
+ kind : boxen
+ col : Model
+ rotation : 45
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - k-NN
+ xlabels: "Condensing Method"
+ ylabels: "Training Time"
+ legend_title : "Sample Ratio"
+ legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ - file : condensing_method_vs_predict_time.pdf
+ x: Condensing Method
+ hue : Condensing Ratio
+ digitize : Condensing Ratio
y : predict_time
- ylabel: Prediction Time (s)
y_scale : log
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim : [0, 1]
- legend: {"title": "Sampling Method", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ col : Model
+ rotation : 45
+ legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ kind : boxen
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - k-NN
+ xlabels: "Condensing Method"
+ ylabels: "Prediction Time"
+ legend_title : "Sample Ratio"
diff --git a/examples/gzip/conf/gzip_knn.yaml b/examples/gzip/conf/gzip_knn.yaml
index da8b7ca5..fc9f0b73 100644
--- a/examples/gzip/conf/gzip_knn.yaml
+++ b/examples/gzip/conf/gzip_knn.yaml
@@ -33,30 +33,26 @@ hydra:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
direction: ${direction}
storage: sqlite:///optuna.db
study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
+ n_trials: 128
+ n_jobs: 8
max_failure_rate: 1.0
params:
model.init.k : 1,3,5,7,11
+model.init.weights : uniform,distance
+model.init.algorithm : brute
- model.init.symmetric : True,False
- ++model.init.precompute : True
- model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name : ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
diff --git a/examples/gzip/conf/gzip_logistic.yaml b/examples/gzip/conf/gzip_logistic.yaml
index 3636c201..e7d9f4d0 100644
--- a/examples/gzip/conf/gzip_logistic.yaml
+++ b/examples/gzip/conf/gzip_logistic.yaml
@@ -33,31 +33,28 @@ hydra:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials : 1
+ n_trials: 128
+ n_jobs: 8
params:
+model.init.solver: saga
- +model.init.penalty : l2,l1,l2,none
- +model.init.tol : 1e-4,1e-3,1e-2
- +model.init.C : 1e-2,1e-1,1e0,1e1,1e2
+ +model.init.penalty : l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C : tag(log, interval(1e-3, 1e3))
+model.init.fit_intercept : True,False
+model.init.class_weight : balanced,None
- model.init.symmetric : True,False
- ++model.init.precompute : True
- model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+
model_name : ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
diff --git a/examples/gzip/conf/gzip_svc.yaml b/examples/gzip/conf/gzip_svc.yaml
index 42212998..4c20c962 100644
--- a/examples/gzip/conf/gzip_svc.yaml
+++ b/examples/gzip/conf/gzip_svc.yaml
@@ -35,29 +35,25 @@ hydra:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials : 2
+ n_trials: 128
+ n_jobs: 8
params:
+model.init.kernel : rbf,precomputed
- +model.init.C : 1e-2,1e-1,1e0,1e1,1e2
+ +model.init.C : tag(log, interval(1e-3, 1e3))
+model.init.gamma : scale,auto
+model.init.class_weight : balanced,null
- model.init.symmetric : True,False
- ++model.init.precompute : True
- model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name : ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
diff --git a/examples/gzip/conf/merged_plots.yaml b/examples/gzip/conf/merged_plots.yaml
new file mode 100644
index 00000000..5226c4bd
--- /dev/null
+++ b/examples/gzip/conf/merged_plots.yaml
@@ -0,0 +1,372 @@
+cat_plot:
+ - file: models_vs_accuracy.pdf
+ x : Model
+ y : accuracy
+ hue : data.sample.train_size
+ errorbar: se
+ kind : boxen
+ titles :
+ xlabels : " "
+ ylabels : Accuracy
+ legend_title: "Samples"
+ legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ rotation: 90
+ col : Dataset
+ order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ - file: models_vs_train_time.pdf
+ x : Model
+ y : train_time
+ hue : data.sample.train_size
+ errorbar: se
+ kind : boxen
+ titles :
+ xlabels : " "
+ ylabels : $t_t$ (s)
+ legend_title: "Samples"
+ rotation: 90
+ col : Dataset
+ legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ y_scale : log
+ order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ - file: models_vs_predict_time.pdf
+ x : Model
+ y : predict_time_per_sample
+ hue : data.sample.train_size
+ errorbar: se
+ kind : boxen
+ titles :
+ xlabels : " "
+ ylabels : $t_i$ (s)
+ legend_title: "Samples"
+ col : Dataset
+ legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ rotation: 90
+ y_scale : log
+ order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ - file: symmetric_models_vs_accuracy.pdf
+ row : Model
+ x : data.sample.train_size
+ y : accuracy
+ hue : Symmetric
+ errorbar: se
+ kind : boxen
+ titles :
+ xlabels : "Samples"
+ ylabels : Accuracy
+ legend_title: " "
+ legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ rotation: 90
+ col : Dataset
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ row_order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ - file: symmetric_models_vs_train_time.pdf
+ row : Model
+ x : data.sample.train_size
+ y : train_time_per_sample
+ hue : Symmetric
+ errorbar: se
+ kind : boxen
+ titles :
+ xlabels : " "
+ ylabels : $t_t$ (s)
+ legend_title: " "
+ rotation: 90
+ col : Dataset
+ legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ y_scale : log
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ row_order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ - file: symmetric_models_vs_predict_time.pdf
+ x : data.sample.train_size
+ row : Model
+ y : predict_time_per_sample
+ hue : Symmetric
+ errorbar: se
+ kind : boxen
+ titles :
+ xlabels : " "
+ ylabels : $t_i$ (s)
+ legend_title: " "
+ col : Dataset
+ legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ rotation: 90
+ y_scale : log
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ row_order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ - file: condensing_methods_vs_accuracy.pdf
+ x : Model
+ y : accuracy
+ hue : Condensing Method
+ errorbar: se
+ kind : boxen
+ titles :
+ xlabels : " "
+ ylabels : Accuracy
+ legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ rotation: 90
+ col : Dataset
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ legend_title: "Condensing Method"
+ - file: condensing_methods_vs_train_time.pdf
+ x : Model
+ y : train_time
+ hue : Condensing Method
+ errorbar: se
+ kind : boxen
+ titles :
+ xlabels : " "
+ ylabels : $t_t$ (s)
+ legend_title: "Condensing Method"
+ rotation: 90
+ col : Dataset
+ y_scale : log
+ legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ - file: condensing_methods_vs_predict_time.pdf
+ x : Model
+ y : predict_time_per_sample
+ hue : Condensing Method
+ errorbar: se
+ kind : boxen
+ titles :
+ xlabels : " "
+ ylabels : $t_i$ (s)
+ legend_title: "Condensing Method"
+ col : Dataset
+ legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ rotation: 90
+ y_scale : log
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+line_plot:
+ - file: compressor_metric_vs_accuracy.pdf
+ hue: Metric
+ title: #"Accuracy vs $m$-best samples across datasets and compressors"
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim: [10, 500]
+ style: Dataset
+ style_order:
+ - "DDoS"
+ - "SMS Spam"
+ - "KDD NSL"
+ - "Truthseeker"
+ legend :
+ bbox_to_anchor : [1.05, .5]
+ loc: center left
+ prop: {"size" : 12}
+ - file: string_metric_vs_accuracy.pdf
+ hue : Metric
+ title: #"Accuracy vs $m$-best samples across datasets and string metrics"
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim: [10, 500]
+ style: Dataset
+ style_order:
+ - "DDoS"
+ - "SMS Spam"
+ - "KDD NSL"
+ - "Truthseeker"
+ legend :
+ bbox_to_anchor : [1.05, .5]
+ loc: center left
+ prop: {"size" : 12}
+ - file: string_metric_vs_train_time.pdf
+ hue : Metric
+ title: #"Accuracy vs $m$-best samples across datasets and string metrics"
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: $t_t$ (s)
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim: [10, 500]
+ style: Dataset
+ style_order:
+ - "DDoS"
+ - "SMS Spam"
+ - "KDD NSL"
+ - "Truthseeker"
+ legend :
+ bbox_to_anchor : [1.05, .5]
+ loc: center left
+ prop: {"size" : 12}
+ y_scale: log
+ - file: compressor_metric_vs_train_time.pdf
+ hue: Metric
+ title: #"Training Time vs $m$-best samples across datasets and compressors"
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: $t_t$ (s)
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim: [10, 500]
+ style: Dataset
+ style_order:
+ - "DDoS"
+ - "SMS Spam"
+ - "KDD NSL"
+ - "Truthseeker"
+ legend :
+ bbox_to_anchor : [1.05, .5]
+ loc: center left
+ prop: {"size" : 12}
+ y_scale: log
+ - file: string_metric_vs_predict_time.pdf
+ hue : Metric
+ title: #"Accuracy vs $m$-best samples across datasets and string metrics"
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time_per_sample
+ ylabel: $t_i$ (s)
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim: [10, 500]
+ style: Dataset
+ style_order:
+ - "DDoS"
+ - "SMS Spam"
+ - "KDD NSL"
+ - "Truthseeker"
+ legend :
+ bbox_to_anchor : [1.05, .5]
+ loc: center left
+ prop: {"size" : 12}
+ y_scale: log
+ - file: compressor_metric_vs_predict_time.pdf
+ hue: Metric
+ title: #"Prediction Time vs $m$-best samples across datasets and compressors"
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time_per_sample
+ ylabel: $t_i$ (s)
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim: [10, 500]
+ style: Dataset
+ style_order:
+ - "DDoS"
+ - "SMS Spam"
+ - "KDD NSL"
+ - "Truthseeker"
+ legend :
+ bbox_to_anchor : [1.05, .5]
+ loc: center left
+ prop: {"size" : 12}
+ y_scale: log
diff --git a/examples/gzip/conf/plots.yaml b/examples/gzip/conf/plots.yaml
index eac757c4..188f8e2f 100644
--- a/examples/gzip/conf/plots.yaml
+++ b/examples/gzip/conf/plots.yaml
@@ -1,17 +1,57 @@
line_plot:
+- file: compressor_metric_vs_accuracy.pdf
+ hue: Metric
+ title: #"Accuracy vs $m$-best samples"
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim: [10, 500]
+ legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
- file: metric_vs_accuracy.pdf
- hue: model.init.metric
+ hue: Metric
title: #"Accuracy vs $m$-best samples"
x: data.sample.train_size
xlabel: Number of Training Samples
y: accuracy
ylabel: Accuracy
hue_order:
- - Gzip
+ - GZIP
- Pickle
- BZ2
- - Zstd
- - Lzma
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim: [10, 500]
+ legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+- file: string_metric_vs_accuracy.pdf
+ hue: Metric
+ title: #"Accuracy vs $m$-best samples"
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ # - GZIP
+ # - Pickle
+ # - BZ2
+ # - ZSTD
+ # - LZMA
- Levenshtein
- Ratio
- Hamming
@@ -23,7 +63,31 @@ line_plot:
xlim: [10, 500]
legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
- file: metric_vs_train_time.pdf
- hue: model.init.metric
+ hue: Metric
+ title: #"Training Time vs $m$-best samples"
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: Training Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim: [10, 500]
+ legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+- file: compressor_metric_vs_train_time.pdf
+ hue: Metric
title: #"Training Time vs $m$-best samples"
x: data.sample.train_size
xlabel: Number of Training Samples
@@ -31,11 +95,29 @@ line_plot:
ylabel: Training Time (s)
y_scale: linear
hue_order:
- - Gzip
+ - GZIP
- Pickle
- BZ2
- - Zstd
- - Lzma
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim: [10, 500]
+ legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+- file: string_metric_vs_train_time.pdf
+ hue: Metric
+ title: #"Training Time vs $m$-best samples"
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: Training Time (s)
+ y_scale: linear
+ hue_order:
+ # - GZIP
+ # - Pickle
+ # - BZ2
+ # - ZSTD
+ # - LZMA
- Levenshtein
- Ratio
- Hamming
@@ -46,8 +128,22 @@ line_plot:
err_style: bars
xlim: [10, 500]
legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+- file: compressor_metric_vs_predict_time.pdf
+ hue: Metric
+ title: #"Prediction Time vs $m$-best samples"
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time
+ ylabel: Prediction Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
- file: metric_vs_predict_time.pdf
- hue: model.init.metric
+ hue: Metric
title: #"Prediction Time vs $m$-best samples"
x: data.sample.train_size
xlabel: Number of Training Samples
@@ -55,11 +151,26 @@ line_plot:
ylabel: Prediction Time (s)
y_scale: linear
hue_order:
- - Gzip
+ - GZIP
- Pickle
- BZ2
- - Zstd
- - Lzma
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+- file: string_metric_vs_predict_time.pdf
+ hue: Metric
+ title: #"Prediction Time vs $m$-best samples"
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time
+ ylabel: Prediction Time (s)
+ y_scale: linear
+ hue_order:
- Levenshtein
- Ratio
- Hamming
@@ -71,99 +182,166 @@ line_plot:
xlim: [10, 500]
legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
cat_plot:
- - file: symmetric_vs_metric.pdf
- x : model.init.symmetric
+ - file: symmetric_vs_compressor_metric.pdf
+ x : Metric
y : accuracy
- hue : model.init.metric
+ hue : Symmetric
errorbar: se
- kind : bar
- titles :
- xlabels : ""
+ kind : boxen
+ titles : " "
+ xlabels : "Compressor"
ylabels : Accuracy
legend_title: "Metrics"
- hue_order:
- - Gzip
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ hue_order :
+ - Asymmetric
+ - Symmetric
+ # - Levenshtein
+ # - Ratio
+ # - Hamming
+ # - Jaro
+ # - Jaro-Winkler
+ # - SeqRatio
+ rotation: 90
+ legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ - file: symmetric_vs_string_metric.pdf
+ x : Metric
+ y : accuracy
+ hue : Symmetric
+ errorbar: se
+ kind : boxen
+ titles : " "
+ xlabels : "Compressors"
+ ylabels : Accuracy
+ legend_title: " "
+ order:
+ # - GZIP
+ # - Pickle
+ # - BZ2
+ # - ZSTD
+ # - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order :
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
+ - file: symmetric_vs_metric.pdf
+ x : Metric
+ y : accuracy
+ hue : Symmetric
+ errorbar: se
+ kind : boxen
+ titles : " "
+ xlabels : "Compressors"
+ ylabels : Accuracy
+ legend_title: " "
+ order:
+ - GZIP
- Pickle
- BZ2
- - Zstd
- - Lzma
+ - ZSTD
+ - LZMA
- Levenshtein
- Ratio
- Hamming
- Jaro
- Jaro-Winkler
- SeqRatio
+ hue_order :
+ - Asymmetric
+ - Symmetric
+ rotation: 90
legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
- set:
- yscale: linear
- ylim: [0, 1]
- file: symmetric_vs_metric_train_time.pdf
- x : model.init.symmetric
+ x : Metric
y : train_time
- hue : model.init.metric
+ hue : Symmetric
errorbar: se
- kind : bar
+ kind : boxen
titles :
- xlabels : ""
+ xlabels : "Metrics"
ylabels : Training Time (s)
legend_title: "Metrics"
- hue_order:
- - Gzip
+ order:
+ - GZIP
- Pickle
- BZ2
- - Zstd
- - Lzma
+ - ZSTD
+ - LZMA
- Levenshtein
- Ratio
- Hamming
- Jaro
- Jaro-Winkler
- SeqRatio
+ hue_order :
+ - Asymmetric
+ - Symmetric
+ rotation : 90
legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
- set:
- yscale: log
- - file: models_vs_accuracy.pdf
- x : model_name
- y : accuracy
- hue : data.sample.train_size
+ y_scale : linear
+ - file: symmetric_vs_string_metric_train_time.pdf
+ x : Metric
+ y : train_time
+ hue : Symmetric
errorbar: se
kind : boxen
- titles :
- xlabels : Model
- ylabels : Accuracy
- legend_title: "Samples"
-
- legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
- set:
- yscale: linear
- ylim: [0, 1]
- rotation: 90
- - file: models_vs_train_time.pdf
- x : model_name
- y : accuracy
- hue : data.sample.train_size
- errorbar: se
- kind : bar
- titles :
- xlabels : Model
+ titles :
+ xlabels : "Compressors"
ylabels : Training Time (s)
- legend_title: "Samples"
- rotation: 90
+ legend_title: "String Metrics"
+ order:
+ # - GZIP
+ # - Pickle
+ # - BZ2
+ # - ZSTD
+ # - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order :
+ - Asymmetric
+ - Symmetric
+ rotation : 90
legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
- set:
- yscale: log
- - file: models_vs_predict_time.pdf
- x : model_name
- y : accuracy
- hue : data.sample.train_size
+ - file: symmetric_vs_compressor_metric_train_time.pdf
+ x : Metric
+ y : train_time
+ hue : Symmetric
errorbar: se
- kind : bar
- titles :
- xlabels : Model
- ylabels : Prediction Time (s)
- legend_title: "Samples"
-
+ kind : boxen
+ titles :
+ xlabels : "Compressors"
+ ylabels : Training Time (s)
+ legend_title: "Metrics"
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ # - Levenshtein
+ # - Ratio
+ # - Hamming
+ # - Jaro
+ # - Jaro-Winkler
+ # - SeqRatio
+ hue_order :
+ - Asymmetric
+ - Symmetric
+ rotation : 90
legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}}
- set:
- yscale: log
- rotation: 90
diff --git a/examples/gzip/dvc.lock b/examples/gzip/dvc.lock
index a02a4b1d..afeed250 100644
--- a/examples/gzip/dvc.lock
+++ b/examples/gzip/dvc.lock
@@ -1,15521 +1,5601 @@
schema: '2.0'
stages:
- train:
- cmd: python -m deckard.layers.experiment train
+ clean@sms_spam-gzip_knn:
+ cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_knn.csv -o
+ sms_spam/plots/clean/gzip_knn.csv -c conf/clean.yaml
deps:
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- - path: raw_data/
+ - path: sms_spam/reports/gzip_knn.csv
hash: md5
- md5: 33d46673e0631bef98be9e8991ed1ed1.dir
- size: 50328647
- nfiles: 8
+ md5: 2cc3444a2175ce059be641e3c97a3958
+ size: 1219660
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: kdd_nsl/reports/train/default/predictions.json
- hash: md5
- md5: 986d2f0abe9b96253b196a222a550609
- size: 702
- - path: kdd_nsl/reports/train/default/score_dict.json
+ - path: sms_spam/plots/clean/gzip_knn.csv
hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- test_each_method@knn-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=knn model.init.m=10 files.name=knn
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/knn
- ++raise_exception=True '
+ md5: 788afe513b0596808b5125d82019c3ae
+ size: 704722
+ clean@sms_spam-gzip_svc:
+ cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_svc.csv -o
+ sms_spam/plots/clean/gzip_svc.csv -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: f8a4019adc566855c2a704a0311ff7c4
- size: 489
- - path: params.yaml
+ - path: sms_spam/reports/gzip_svc.csv
hash: md5
- md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2
- size: 1467
+ md5: c4196fa3f0dbc4a27972b967e7104485
+ size: 1327853
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: kdd_nsl/logs/method/knn
+ - path: sms_spam/plots/clean/gzip_svc.csv
hash: md5
- md5: f902bdd8882aa06bba0d1fef19c4a313.dir
- size: 11613
- nfiles: 4
- - path: kdd_nsl/reports/train/knn/score_dict.json
- hash: md5
- md5: 4e7f0750779df5202e5dec6228f94f99
- size: 490
- test_each_method@knn-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=knn model.init.m=10 files.name=knn
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn
- hydra.run.dir=truthseeker/logs/method/knn ++raise_exception=True '
+ md5: 75d1640476b0bfb25b015190f8b4d3ed
+ size: 1077730
+ clean@sms_spam-gzip_logistic:
+ cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_logistic.csv
+ -o sms_spam/plots/clean/gzip_logistic.csv -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: f8a4019adc566855c2a704a0311ff7c4
- size: 489
- - path: params.yaml
+ - path: sms_spam/reports/gzip_logistic.csv
hash: md5
- md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2
- size: 1467
+ md5: 0b87e1a278e97393093edfa85a6c3647
+ size: 1324676
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: truthseeker/logs/method/knn
+ - path: sms_spam/plots/clean/gzip_logistic.csv
hash: md5
- md5: 5a52da2681ff444c53a1623722c2d431.dir
- size: 11642
- nfiles: 4
- - path: truthseeker/reports/train/knn/score_dict.json
- hash: md5
- md5: f09f746efa5c7a56f4dd1a3e20a7ab6b
- size: 485
- test_each_method@svc-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=svc model.init.m=10 files.name=svc
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/svc
- ++raise_exception=True '
+ md5: 66fb493c5dac4d615c1047e8c4432846
+ size: 954789
+ clean@sms_spam-condense/knn:
+ cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/knn.csv
+ -o sms_spam/plots/clean/condense/knn.csv -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: f8a4019adc566855c2a704a0311ff7c4
- size: 489
- - path: params.yaml
+ - path: sms_spam/reports/condense/knn.csv
hash: md5
- md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2
- size: 1467
+ md5: 905472e105c51a514aa316767bce543e
+ size: 1313303
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: kdd_nsl/logs/method/svc
+ - path: sms_spam/plots/clean/condense/knn.csv
hash: md5
- md5: 433b30d37ba64e71527ac2d837b44fa2.dir
- size: 11612
- nfiles: 4
- - path: kdd_nsl/reports/train/svc/score_dict.json
- hash: md5
- md5: f41538adb6ffa9182ea126c85c353abf
- size: 489
- test_each_method@svc-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=svc model.init.m=10 files.name=svc
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn
- hydra.run.dir=truthseeker/logs/method/svc ++raise_exception=True '
+ md5: ca86373d57bc8ef7b33d53d4113d5b17
+ size: 859047
+ clean@sms_spam-condense/svc:
+ cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/svc.csv
+ -o sms_spam/plots/clean/condense/svc.csv -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: f8a4019adc566855c2a704a0311ff7c4
- size: 489
- - path: params.yaml
+ - path: sms_spam/reports/condense/svc.csv
hash: md5
- md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2
- size: 1467
+ md5: 63204fb6e188d4166e415c86e305631d
+ size: 1399188
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: truthseeker/logs/method/svc
- hash: md5
- md5: bc37655235ef0d2919a62c85456d379c.dir
- size: 11645
- nfiles: 4
- - path: truthseeker/reports/train/svc/score_dict.json
+ - path: sms_spam/plots/clean/condense/svc.csv
hash: md5
- md5: 97f1fed3ee2887773ca9a50eeeb5b1ed
- size: 488
- test_each_method@medoid-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=medoid model.init.m=10 files.name=medoid
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/medoid
- ++raise_exception=True '
+ md5: c91f0d6cc570e6ea8fe093ba67ea5da8
+ size: 1142139
+ clean@sms_spam-condense/logistic:
+ cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/logistic.csv
+ -o sms_spam/plots/clean/condense/logistic.csv -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: f8a4019adc566855c2a704a0311ff7c4
- size: 489
- - path: params.yaml
+ - path: sms_spam/reports/condense/logistic.csv
hash: md5
- md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2
- size: 1467
+ md5: 5d331b32fbe15e0cdc7611fc3aa946a2
+ size: 3983718
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: kdd_nsl/logs/method/medoid
- hash: md5
- md5: 5b972c1f6a8c4ebff94a088e2be12b28.dir
- size: 11661
- nfiles: 4
- - path: kdd_nsl/reports/train/medoid/score_dict.json
+ - path: sms_spam/plots/clean/condense/logistic.csv
hash: md5
- md5: 10a0913632dea0d6717263ba1854b1e2
- size: 484
- test_each_method@medoid-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=medoid model.init.m=10 files.name=medoid
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=medoid
- hydra.run.dir=truthseeker/logs/method/medoid ++raise_exception=True '
+ md5: 6d5bc96d209d77fefaf76e73109b26ac
+ size: 2257621
+ merge@sms_spam:
+ cmd: python merge.py --big_dir sms_spam/plots/ --data_file clean/gzip_knn.csv
+ --little_dir_data_file clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder
+ sms_spam/plots --output_file merged.csv
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ - path: sms_spam/plots/clean/gzip_knn.csv
hash: md5
- md5: 064e5bb42979e36c917c538b2a7bc0cc
- size: 489
- - path: params.yaml
- hash: md5
- md5: 8e937140db56a135e97c05461c573520
- size: 1345
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/logs/method/medoid
- hash: md5
- md5: 7b6fef8487e5b8dec0f76f4b4fc59ccb.dir
- size: 10226
- nfiles: 4
- - path: truthseeker/reports/train/medoid/score_dict.json
- hash: md5
- md5: 8cebb3ee0098d2ee2bb4130e346e8e0f
- size: 282
- test_each_method@sum-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=sum model.init.m=10 files.name=sum
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/sum
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ md5: 788afe513b0596808b5125d82019c3ae
+ size: 704722
+ - path: sms_spam/plots/clean/gzip_logistic.csv
hash: md5
- md5: f8a4019adc566855c2a704a0311ff7c4
- size: 489
- - path: params.yaml
+ md5: 66fb493c5dac4d615c1047e8c4432846
+ size: 954789
+ - path: sms_spam/plots/clean/gzip_svc.csv
hash: md5
- md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2
- size: 1467
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ md5: 75d1640476b0bfb25b015190f8b4d3ed
+ size: 1077730
outs:
- - path: kdd_nsl/logs/method/sum
- hash: md5
- md5: 41cd7632a1d85e7380d14b0e8eccc819.dir
- size: 11607
- nfiles: 4
- - path: kdd_nsl/reports/train/sum/score_dict.json
+ - path: sms_spam/plots/merged.csv
hash: md5
- md5: 2a97e468ea2e9071e1f7d5bdb1e7495b
- size: 484
- test_each_method@sum-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=sum model.init.m=10 files.name=sum
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=sum
- hydra.run.dir=truthseeker/logs/method/sum ++raise_exception=True '
+ md5: 4baf51fdcc220aedc6443147a057559e
+ size: 2765074
+ merge_condense@sms_spam:
+ cmd: python merge.py --big_dir sms_spam/plots/ --data_file clean/condense/knn.csv
+ --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder
+ sms_spam/plots/ --output_file condensed_merged.csv
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ - path: sms_spam/plots/clean/condense/knn.csv
hash: md5
- md5: 064e5bb42979e36c917c538b2a7bc0cc
- size: 489
- - path: params.yaml
+ md5: ca86373d57bc8ef7b33d53d4113d5b17
+ size: 859047
+ - path: sms_spam/plots/clean/condense/logistic.csv
hash: md5
- md5: 8e937140db56a135e97c05461c573520
- size: 1345
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/logs/method/sum
+ md5: 6d5bc96d209d77fefaf76e73109b26ac
+ size: 2257621
+ - path: sms_spam/plots/clean/condense/svc.csv
hash: md5
- md5: e7f9741f777d98f3d3416264b9f3e6b2.dir
- size: 10164
- nfiles: 4
- - path: truthseeker/reports/train/sum/score_dict.json
+ md5: c91f0d6cc570e6ea8fe093ba67ea5da8
+ size: 1142139
+ outs:
+ - path: sms_spam/plots/condensed_merged.csv
hash: md5
- md5: d49a3cbdeb348bbf9ad3b59e9e8e0e32
- size: 283
- test_each_method@random-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=random model.init.m=10 files.name=random
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/random
- ++raise_exception=True '
+ md5: aff0ab5439e406220d4c0c95d7032f71
+ size: 4293513
+ plot@sms_spam:
+ cmd: python -m deckard.layers.plots --path sms_spam/plots/ --file sms_spam/plots/merged.csv -c
+ conf/plots.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ - path: conf/plots.yaml
hash: md5
- md5: f8a4019adc566855c2a704a0311ff7c4
- size: 489
- - path: params.yaml
+ md5: 43e3ec0876b55c83f231615f7a904e33
+ size: 7386
+ - path: sms_spam/plots/merged.csv
hash: md5
- md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2
- size: 1467
+ md5: 4baf51fdcc220aedc6443147a057559e
+ size: 2765074
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/logs/method/random
- hash: md5
- md5: 723e8c93428a09edb21943a20fca5c3c.dir
- size: 11639
- nfiles: 4
- - path: kdd_nsl/reports/train/random/score_dict.json
- hash: md5
- md5: ed402e68904e8888b8ba6b0bebf6fa05
- size: 488
- test_each_method@random-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=random model.init.m=10 files.name=random
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn
- hydra.run.dir=truthseeker/logs/method/random ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: f8a4019adc566855c2a704a0311ff7c4
- size: 489
- - path: params.yaml
- hash: md5
- md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2
- size: 1467
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/logs/method/random
- hash: md5
- md5: f785fe50b4007a169c37e6e9cb856268.dir
- size: 11670
- nfiles: 4
- - path: truthseeker/reports/train/random/score_dict.json
- hash: md5
- md5: 8bfb4b2efa55e9944cec7331401762f9
- size: 485
- prepare_distance_matrices@0-10-kdd_nsl:
- cmd: python -m deckard.layers.optimise files.name=0-10 stage=train data=kdd_nsl
- dataset=kdd_nsl data.sample.random_state=0 data.sample.train_size=10 dataset=kdd_nsl
- files.directory=kdd_nsl model_name=gzip_classifier model=gzip_classifier model.init.distance_matrix=kdd_nsl/model/gzip_classifier/gzip/0-10.npz
- model.init.method=random model.init.m=100 ++raise_exception=True
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 3332d80113acf55f8e69e46aea82a1cc
- size: 412
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name:
- https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/d6a22cdb42a1db624c89f0298cb4f654d3812703/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name:
- https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/d6a22cdb42a1db624c89f0298cb4f654d3812703/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix: kdd_nsl/model/gzip_classifier/gzip/0-100.npz
- k: 1
- m: -1
- method:
- name: gzip_classifier.GzipClassifier
- library: sklearn
- model_name: gzip_classifier
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/model/gzip_classifier/gzip/0-10.npz
- hash: md5
- md5: 1b745ff8dbc88f247f3245d9efd6de7e
- size: 208
- - path: kdd_nsl/reports/train/0-10/score_dict.json
- hash: md5
- md5: cae521db2dcda14d0d3ed880c26adf62
- size: 233
- prepare_distance_matrices@0-100-kdd_nsl:
- cmd: python -m deckard.layers.optimise files.name=0-100 stage=train data=kdd_nsl
- dataset=kdd_nsl data.sample.random_state=0 data.sample.train_size=100 dataset=kdd_nsl
- files.directory=kdd_nsl model_name=gzip_classifier model=gzip_classifier model.init.distance_matrix=kdd_nsl/model/gzip_classifier/gzip/0-100.npz
- model.init.method=random model.init.m=100 ++raise_exception=True
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 222b4b55b1b16639ce30218bf60c1f32
- size: 412
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name:
- https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/d6a22cdb42a1db624c89f0298cb4f654d3812703/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- data:
- cmd: python data_prep.py
- deps:
- - path: data_prep.py
- hash: md5
- md5: 18244c921ed2d7cbf25b8362b3ca33aa
- size: 5146
- outs:
- - path: raw_data/
- hash: md5
- md5: 33d46673e0631bef98be9e8991ed1ed1.dir
- size: 50328647
- nfiles: 8
- test_symmetric_methods@true-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random
- model.init.m=10 files.name=symmetric_true files.directory=kdd_nsl data=kdd_nsl
- dataset=kdd_nsl model_name=gzip_knn model.init.symmetric=true hydra.run.dir=kdd_nsl/logs/symmetric/true
- model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/symmetric_true.npz ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- - path: raw_data/
- hash: md5
- md5: d897229dd67895957a0a4330ce95b09a.dir
- size: 42279674
- nfiles: 4
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/model/gzip_knn/None/symmetric_true.npz
- hash: md5
- md5: 1b745ff8dbc88f247f3245d9efd6de7e
- size: 208
- - path: kdd_nsl/reports/train/symmetric_true/score_dict.json
- hash: md5
- md5: bb10a010ac3f8790cdbe4310288efc63
- size: 432
- test_symmetric_methods@true-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random
- model.init.m=10 files.name=symmetric_true files.directory=truthseeker data=truthseeker
- dataset=truthseeker model_name=gzip_knn model.init.symmetric=true hydra.run.dir=truthseeker/logs/symmetric/true
- model.init.distance_matrix=truthseeker/model/gzip_knn/None/symmetric_true.npz
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- - path: raw_data/
- hash: md5
- md5: d897229dd67895957a0a4330ce95b09a.dir
- size: 42279674
- nfiles: 4
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/model/gzip_knn/None/symmetric_true.npz
- hash: md5
- md5: f71a2727e708fdfb7867a6983f3aa8cf
- size: 223
- - path: truthseeker/reports/train/symmetric_true/score_dict.json
- hash: md5
- md5: 6d7a4eb01733e4e2fda1c40b5562646c
- size: 434
- test_symmetric_methods@true-sms_spam:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random
- model.init.m=10 files.name=symmetric_true files.directory=sms_spam data=sms_spam
- dataset=sms_spam model_name=gzip_knn model.init.symmetric=true hydra.run.dir=sms_spam/logs/symmetric/true
- model.init.distance_matrix=sms_spam/model/gzip_knn/None/symmetric_true.npz ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- - path: raw_data/
- hash: md5
- md5: d897229dd67895957a0a4330ce95b09a.dir
- size: 42279674
- nfiles: 4
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: sms_spam/model/gzip_knn/None/symmetric_true.npz
- hash: md5
- md5: 1b745ff8dbc88f247f3245d9efd6de7e
- size: 208
- - path: sms_spam/reports/train/symmetric_true/score_dict.json
- hash: md5
- md5: 0b8d690ffca7173942d490a2f0cbeec4
- size: 432
- test_symmetric_methods@true-ddos:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random
- model.init.m=10 files.name=symmetric_true files.directory=ddos data=ddos dataset=ddos
- model_name=gzip_knn model.init.symmetric=true hydra.run.dir=ddos/logs/symmetric/true
- model.init.distance_matrix=ddos/model/gzip_knn/None/symmetric_true.npz ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- - path: raw_data/
- hash: md5
- md5: d897229dd67895957a0a4330ce95b09a.dir
- size: 42279674
- nfiles: 4
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/model/gzip_knn/None/symmetric_true.npz
- hash: md5
- md5: 1b745ff8dbc88f247f3245d9efd6de7e
- size: 208
- - path: ddos/reports/train/symmetric_true/score_dict.json
- hash: md5
- md5: 2c12176f8bf7355f284e059b2527cf44
- size: 418
- test_symmetric_methods@false-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random
- model.init.m=10 files.name=symmetric_false files.directory=kdd_nsl data=kdd_nsl
- dataset=kdd_nsl model_name=gzip_knn model.init.symmetric=false hydra.run.dir=kdd_nsl/logs/symmetric/false
- model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/symmetric_false.npz ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- - path: raw_data/
- hash: md5
- md5: d897229dd67895957a0a4330ce95b09a.dir
- size: 42279674
- nfiles: 4
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/model/gzip_knn/None/symmetric_false.npz
- hash: md5
- md5: 9a9fcf9ba5dbc34eb2ca1f203088fc47
- size: 740
- - path: kdd_nsl/reports/train/symmetric_false/score_dict.json
- hash: md5
- md5: 8ae56e642565330a37e731472a6c2d76
- size: 429
- test_symmetric_methods@false-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random
- model.init.m=10 files.name=symmetric_false files.directory=truthseeker data=truthseeker
- dataset=truthseeker model_name=gzip_knn model.init.symmetric=false hydra.run.dir=truthseeker/logs/symmetric/false
- model.init.distance_matrix=truthseeker/model/gzip_knn/None/symmetric_false.npz
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- - path: raw_data/
- hash: md5
- md5: d897229dd67895957a0a4330ce95b09a.dir
- size: 42279674
- nfiles: 4
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/model/gzip_knn/None/symmetric_false.npz
- hash: md5
- md5: b02cc76ddfb10d1e0e63e0f6e05cdaae
- size: 1791
- - path: truthseeker/reports/train/symmetric_false/score_dict.json
- hash: md5
- md5: 4ef36cb0b198d778dc8e0e6ff282d778
- size: 433
- test_symmetric_methods@false-sms_spam:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random
- model.init.m=10 files.name=symmetric_false files.directory=sms_spam data=sms_spam
- dataset=sms_spam model_name=gzip_knn model.init.symmetric=false hydra.run.dir=sms_spam/logs/symmetric/false
- model.init.distance_matrix=sms_spam/model/gzip_knn/None/symmetric_false.npz
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- - path: raw_data/
- hash: md5
- md5: d897229dd67895957a0a4330ce95b09a.dir
- size: 42279674
- nfiles: 4
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: sms_spam/model/gzip_knn/None/symmetric_false.npz
- hash: md5
- md5: ac71e5af3607731b783a490caf81c37f
- size: 694
- - path: sms_spam/reports/train/symmetric_false/score_dict.json
- hash: md5
- md5: 66d92f0ed630b08fbddb1a9c07f13981
- size: 432
- test_symmetric_methods@false-ddos:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random
- model.init.m=10 files.name=symmetric_false files.directory=ddos data=ddos dataset=ddos
- model_name=gzip_knn model.init.symmetric=false hydra.run.dir=ddos/logs/symmetric/false
- model.init.distance_matrix=ddos/model/gzip_knn/None/symmetric_false.npz ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- - path: raw_data/
- hash: md5
- md5: d897229dd67895957a0a4330ce95b09a.dir
- size: 42279674
- nfiles: 4
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/model/gzip_knn/None/symmetric_false.npz
- hash: md5
- md5: 0d3f08d9c6cb8ddc6d3e68f8208c9bc5
- size: 821
- - path: ddos/reports/train/symmetric_false/score_dict.json
- hash: md5
- md5: ba81be29d56943d6d573597c93ba8081
- size: 412
- test_each_compressor@gzip-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip files.directory=kdd_nsl
- data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/gzip.npz
- model.init.compressor=gzip model.init.m=10 hydra.run.dir=kdd_nsl/logs/compressor/gzip
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/reports/train/gzip/score_dict.json
- hash: md5
- md5: b3f76b5e7fe68821d9336c4968888b08
- size: 431
- test_each_compressor@gzip-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip files.directory=truthseeker
- data=truthseeker dataset=truthseeker model_name=gzip_knn model.init.method=random
- model.init.distance_matrix=truthseeker/model/gzip_knn/None/gzip.npz model.init.compressor=gzip model.init.m=10
- hydra.run.dir=truthseeker/logs/compressor/gzip ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/reports/train/gzip/score_dict.json
- hash: md5
- md5: df9b8a302dfb3b85b5c3c7623d86383e
- size: 434
- test_each_compressor@gzip-sms_spam:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip files.directory=sms_spam
- data=sms_spam dataset=sms_spam model_name=gzip_knn model.init.method=random
- model.init.distance_matrix=sms_spam/model/gzip_knn/None/gzip.npz model.init.compressor=gzip model.init.m=10
- hydra.run.dir=sms_spam/logs/compressor/gzip ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: sms_spam/reports/train/gzip/score_dict.json
- hash: md5
- md5: 39a6710366ed557259ef981fc0b45a6a
- size: 432
- test_each_compressor@gzip-ddos:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip files.directory=ddos
- data=ddos dataset=ddos model_name=gzip_knn model.init.method=random model.init.distance_matrix=ddos/model/gzip_knn/None/gzip.npz
- model.init.compressor=gzip model.init.m=10 hydra.run.dir=ddos/logs/compressor/gzip
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/reports/train/gzip/score_dict.json
- hash: md5
- md5: 1919cb29d6196b8dd14c01458e341a6b
- size: 414
- test_each_compressor@zstd-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=zstd files.directory=kdd_nsl
- data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/zstd.npz
- model.init.compressor=zstd model.init.m=10 hydra.run.dir=kdd_nsl/logs/compressor/zstd
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/reports/train/zstd/score_dict.json
- hash: md5
- md5: 868509c201cbb0093818357427896da7
- size: 416
- test_each_compressor@zstd-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=zstd files.directory=truthseeker
- data=truthseeker dataset=truthseeker model_name=gzip_knn model.init.method=random
- model.init.distance_matrix=truthseeker/model/gzip_knn/None/zstd.npz model.init.compressor=zstd model.init.m=10
- hydra.run.dir=truthseeker/logs/compressor/zstd ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/reports/train/zstd/score_dict.json
- hash: md5
- md5: 89546ca3a3510fd73671341863c69cb9
- size: 434
- test_each_compressor@zstd-sms_spam:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=zstd files.directory=sms_spam
- data=sms_spam dataset=sms_spam model_name=gzip_knn model.init.method=random
- model.init.distance_matrix=sms_spam/model/gzip_knn/None/zstd.npz model.init.compressor=zstd model.init.m=10
- hydra.run.dir=sms_spam/logs/compressor/zstd ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: sms_spam/reports/train/zstd/score_dict.json
- hash: md5
- md5: e5a10b0013b032b22dd6cc596a7810bb
- size: 429
- test_each_compressor@zstd-ddos:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=zstd files.directory=ddos
- data=ddos dataset=ddos model_name=gzip_knn model.init.method=random model.init.distance_matrix=ddos/model/gzip_knn/None/zstd.npz
- model.init.compressor=zstd model.init.m=10 hydra.run.dir=ddos/logs/compressor/zstd
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/reports/train/zstd/score_dict.json
- hash: md5
- md5: 898feb287504053c9de9c1a809733c4b
- size: 432
- test_each_compressor@pkl-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=pkl files.directory=kdd_nsl
- data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/pkl.npz
- model.init.compressor=pkl model.init.m=10 hydra.run.dir=kdd_nsl/logs/compressor/pkl
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/reports/train/pkl/score_dict.json
- hash: md5
- md5: 3e01c227095014ab9f4665ea98e7f3b5
- size: 430
- test_each_compressor@pkl-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=pkl files.directory=truthseeker
- data=truthseeker dataset=truthseeker model_name=gzip_knn model.init.method=random
- model.init.distance_matrix=truthseeker/model/gzip_knn/None/pkl.npz model.init.compressor=pkl model.init.m=10
- hydra.run.dir=truthseeker/logs/compressor/pkl ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/reports/train/pkl/score_dict.json
- hash: md5
- md5: 85d4598fcbe6077a465a9edeadd3843a
- size: 430
- test_each_compressor@pkl-sms_spam:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=pkl files.directory=sms_spam
- data=sms_spam dataset=sms_spam model_name=gzip_knn model.init.method=random
- model.init.distance_matrix=sms_spam/model/gzip_knn/None/pkl.npz model.init.compressor=pkl model.init.m=10
- hydra.run.dir=sms_spam/logs/compressor/pkl ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: sms_spam/reports/train/pkl/score_dict.json
- hash: md5
- md5: a4667414e7721ee7ed489df1e412e0b0
- size: 431
- test_each_compressor@pkl-ddos:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=pkl files.directory=ddos
- data=ddos dataset=ddos model_name=gzip_knn model.init.method=random model.init.distance_matrix=ddos/model/gzip_knn/None/pkl.npz
- model.init.compressor=pkl model.init.m=10 hydra.run.dir=ddos/logs/compressor/pkl
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/reports/train/pkl/score_dict.json
- hash: md5
- md5: 340261dd836239b846699c4c687b3042
- size: 432
- test_each_compressor@bz2-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=bz2 files.directory=kdd_nsl
- data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/bz2.npz
- model.init.compressor=bz2 model.init.m=10 hydra.run.dir=kdd_nsl/logs/compressor/bz2
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/reports/train/bz2/score_dict.json
- hash: md5
- md5: 05fd4b45d252c648d4afb4ba3ffc05e4
- size: 430
- test_each_compressor@bz2-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=bz2 files.directory=truthseeker
- data=truthseeker dataset=truthseeker model_name=gzip_knn model.init.method=random
- model.init.distance_matrix=truthseeker/model/gzip_knn/None/bz2.npz model.init.compressor=bz2 model.init.m=10
- hydra.run.dir=truthseeker/logs/compressor/bz2 ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/reports/train/bz2/score_dict.json
- hash: md5
- md5: 1b3094ea4075cb1b5b8cd3f74bf0c3dc
- size: 432
- test_each_compressor@bz2-sms_spam:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=bz2 files.directory=sms_spam
- data=sms_spam dataset=sms_spam model_name=gzip_knn model.init.method=random
- model.init.distance_matrix=sms_spam/model/gzip_knn/None/bz2.npz model.init.compressor=bz2 model.init.m=10
- hydra.run.dir=sms_spam/logs/compressor/bz2 ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: sms_spam/reports/train/bz2/score_dict.json
- hash: md5
- md5: 45303b7d052fb91e65c9f3ad97999b6a
- size: 431
- test_each_compressor@bz2-ddos:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=bz2 files.directory=ddos
- data=ddos dataset=ddos model_name=gzip_knn model.init.method=random model.init.distance_matrix=ddos/model/gzip_knn/None/bz2.npz
- model.init.compressor=bz2 model.init.m=10 hydra.run.dir=ddos/logs/compressor/bz2
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/reports/train/bz2/score_dict.json
- hash: md5
- md5: fdfa470b2053f561dea2e047423b54cd
- size: 431
- test_each_precompute@True-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_True
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random
- model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/True.npz +model.init.precompute=True model.init.m=10 hydra.run.dir=kdd_nsl/logs/precompute/True
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/reports/train/precompute_True/score_dict.json
- hash: md5
- md5: f5c9a9ce41a0680f1e18874d6f21bd25
- size: 433
- test_each_precompute@True-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_True
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn
- model.init.method=random model.init.distance_matrix=truthseeker/model/gzip_knn/None/True.npz
- +model.init.precompute=True model.init.m=10 hydra.run.dir=truthseeker/logs/precompute/True
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/reports/train/precompute_True/score_dict.json
- hash: md5
- md5: 76dcdbf7dc1fb63ce7b978c2f6bef8a2
- size: 435
- test_each_precompute@True-sms_spam:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_True
- files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn
- model.init.method=random model.init.distance_matrix=sms_spam/model/gzip_knn/None/True.npz
- +model.init.precompute=True model.init.m=10 hydra.run.dir=sms_spam/logs/precompute/True
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: sms_spam/reports/train/precompute_True/score_dict.json
- hash: md5
- md5: fe9a23520513840fe4a90fb8413e62da
- size: 432
- test_each_precompute@True-ddos:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_True
- files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.method=random
- model.init.distance_matrix=ddos/model/gzip_knn/None/True.npz +model.init.precompute=True model.init.m=10 hydra.run.dir=ddos/logs/precompute/True
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/reports/train/precompute_True/score_dict.json
- hash: md5
- md5: 0d72c99dc99df13629a383ca9745712e
- size: 429
- test_each_precompute@False-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_False
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random
- model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/False.npz +model.init.precompute=False model.init.m=10 hydra.run.dir=kdd_nsl/logs/precompute/False
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/reports/train/precompute_False/score_dict.json
- hash: md5
- md5: d225ea006c02f56f552431e223ef6576
- size: 429
- test_each_precompute@False-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_False
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn
- model.init.method=random model.init.distance_matrix=truthseeker/model/gzip_knn/None/False.npz
- +model.init.precompute=False model.init.m=10 hydra.run.dir=truthseeker/logs/precompute/False
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/reports/train/precompute_False/score_dict.json
- hash: md5
- md5: e8094fb43b55432d298346a0a291ac71
- size: 431
- test_each_precompute@False-sms_spam:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_False
- files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn
- model.init.method=random model.init.distance_matrix=sms_spam/model/gzip_knn/None/False.npz
- +model.init.precompute=False model.init.m=10 hydra.run.dir=sms_spam/logs/precompute/False
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: sms_spam/reports/train/precompute_False/score_dict.json
- hash: md5
- md5: 0f3b13aba3cc817f2327769f36b54939
- size: 432
- test_each_precompute@False-ddos:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_False
- files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.method=random
- model.init.distance_matrix=ddos/model/gzip_knn/None/False.npz +model.init.precompute=False model.init.m=10 hydra.run.dir=ddos/logs/precompute/False
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/reports/train/precompute_False/score_dict.json
- hash: md5
- md5: 9cc47f921a908ad81e486980d134f453
- size: 418
- test_each_metric@levenshtein-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=levenshtein files.name=levenshtein
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/levenshtein.npz
- hydra.sweeper.n_jobs=1 hydra.run.dir=kdd_nsl/logs/metric/levenshtein ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/reports/train/levenshtein/score_dict.json
- hash: md5
- md5: 4f517489b794c13bbbbb477bd7b14ea8
- size: 248
- test_each_metric@levenshtein-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=levenshtein files.name=levenshtein
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn
- model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/levenshtein.npz hydra.sweeper.n_jobs=1
- hydra.run.dir=truthseeker/logs/metric/levenshtein ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/reports/train/levenshtein/score_dict.json
- hash: md5
- md5: 2f0fa43167cde43c2d8c901ee6bc360d
- size: 250
- test_each_metric@levenshtein-sms_spam:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=levenshtein files.name=levenshtein
- files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn
- model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/levenshtein.npz hydra.sweeper.n_jobs=1
- hydra.run.dir=sms_spam/logs/metric/levenshtein ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: sms_spam/reports/train/levenshtein/score_dict.json
- hash: md5
- md5: bb8456e5a2457e841619d5750922bd0c
- size: 246
- test_each_metric@levenshtein-ddos:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=levenshtein files.name=levenshtein
- files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/levenshtein.npz
- hydra.sweeper.n_jobs=1 hydra.run.dir=ddos/logs/metric/levenshtein ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/reports/train/levenshtein/score_dict.json
- hash: md5
- md5: 1956a0651292bf6919a103e46c0c5906
- size: 248
- test_each_metric@ratio-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=ratio files.name=ratio
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/ratio.npz
- hydra.sweeper.n_jobs=1 hydra.run.dir=kdd_nsl/logs/metric/ratio ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/reports/train/ratio/score_dict.json
- hash: md5
- md5: 841058c500666af10a3a84fd7769e53d
- size: 244
- test_each_metric@ratio-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=ratio files.name=ratio
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn
- model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/ratio.npz hydra.sweeper.n_jobs=8
- hydra.run.dir=truthseeker/logs/metric/ratio ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/reports/train/ratio/score_dict.json
- hash: md5
- md5: 5cbc24c928a073a9459428d4e1984ba1
- size: 426
- test_each_metric@ratio-sms_spam:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=ratio files.name=ratio
- files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn
- model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/ratio.npz hydra.sweeper.n_jobs=8
- hydra.run.dir=sms_spam/logs/metric/ratio ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: sms_spam/reports/train/ratio/score_dict.json
- hash: md5
- md5: b8ea7bf8de9af2250f1a2c84695be1f9
- size: 425
- test_each_metric@ratio-ddos:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=ratio files.name=ratio
- files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/ratio.npz
- hydra.sweeper.n_jobs=8 hydra.run.dir=ddos/logs/metric/ratio ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/reports/train/ratio/score_dict.json
- hash: md5
- md5: 5f9750a5729db8f4912f50a8610fc48c
- size: 429
- test_each_metric@hamming-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=hamming files.name=hamming
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/hamming.npz
- hydra.sweeper.n_jobs=8 hydra.run.dir=kdd_nsl/logs/metric/hamming ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/reports/train/hamming/score_dict.json
- hash: md5
- md5: ed699605a76c4116a461994f139da237
- size: 429
- test_each_metric@hamming-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=hamming files.name=hamming
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn
- model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/hamming.npz hydra.sweeper.n_jobs=8
- hydra.run.dir=truthseeker/logs/metric/hamming ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/reports/train/hamming/score_dict.json
- hash: md5
- md5: 8a3f87734f208a61bc27114729fd4fd6
- size: 432
- test_each_metric@hamming-sms_spam:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=hamming files.name=hamming
- files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn
- model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/hamming.npz hydra.sweeper.n_jobs=8
- hydra.run.dir=sms_spam/logs/metric/hamming ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: sms_spam/reports/train/hamming/score_dict.json
- hash: md5
- md5: 0c0988090568dc526d0137ff7e38ca6a
- size: 428
- test_each_metric@hamming-ddos:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=hamming files.name=hamming
- files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/hamming.npz
- hydra.sweeper.n_jobs=8 hydra.run.dir=ddos/logs/metric/hamming ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/reports/train/hamming/score_dict.json
- hash: md5
- md5: 949f7ea27f2521fbbb2b05ec3a111346
- size: 428
- test_each_metric@jaro-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro files.name=jaro
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/jaro.npz
- hydra.sweeper.n_jobs=8 hydra.run.dir=kdd_nsl/logs/metric/jaro ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/reports/train/jaro/score_dict.json
- hash: md5
- md5: 3bd4e5c89097070d439c3f13359ff369
- size: 428
- test_each_metric@jaro-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro files.name=jaro
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn
- model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/jaro.npz hydra.sweeper.n_jobs=8
- hydra.run.dir=truthseeker/logs/metric/jaro ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/reports/train/jaro/score_dict.json
- hash: md5
- md5: b86d70f18ea7ee85132f4d8407058d60
- size: 429
- test_each_metric@jaro-sms_spam:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro files.name=jaro
- files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn
- model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/jaro.npz hydra.sweeper.n_jobs=8
- hydra.run.dir=sms_spam/logs/metric/jaro ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: sms_spam/reports/train/jaro/score_dict.json
- hash: md5
- md5: b7550248d10852d10a16610f707ea50f
- size: 429
- test_each_metric@jaro-ddos:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro files.name=jaro
- files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/jaro.npz
- hydra.sweeper.n_jobs=8 hydra.run.dir=ddos/logs/metric/jaro ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/reports/train/jaro/score_dict.json
- hash: md5
- md5: e7987cb2d248f7eaa20a842bbcacc442
- size: 430
- test_each_metric@jaro_winkler-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro_winkler files.name=jaro_winkler
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/jaro_winkler.npz
- hydra.sweeper.n_jobs=8 hydra.run.dir=kdd_nsl/logs/metric/jaro_winkler ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/reports/train/jaro_winkler/score_dict.json
- hash: md5
- md5: a44e09663d05f8330352712ccfd72f17
- size: 428
- test_each_metric@jaro_winkler-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro_winkler files.name=jaro_winkler
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn
- model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/jaro_winkler.npz hydra.sweeper.n_jobs=8
- hydra.run.dir=truthseeker/logs/metric/jaro_winkler ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/reports/train/jaro_winkler/score_dict.json
- hash: md5
- md5: 2a80298804f36bc7af477e11ff9f6679
- size: 428
- test_each_metric@jaro_winkler-sms_spam:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro_winkler files.name=jaro_winkler
- files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn
- model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/jaro_winkler.npz hydra.sweeper.n_jobs=8
- hydra.run.dir=sms_spam/logs/metric/jaro_winkler ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: sms_spam/reports/train/jaro_winkler/score_dict.json
- hash: md5
- md5: 8b7d0f92e14d74042fb8cd907e3a8274
- size: 430
- test_each_metric@jaro_winkler-ddos:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro_winkler files.name=jaro_winkler
- files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/jaro_winkler.npz
- hydra.sweeper.n_jobs=8 hydra.run.dir=ddos/logs/metric/jaro_winkler ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/reports/train/jaro_winkler/score_dict.json
- hash: md5
- md5: aa4130c79130ddbaaebaa35a1cae7d91
- size: 426
- test_each_metric@seqratio-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=seqratio files.name=seqratio
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/seqratio.npz
- hydra.sweeper.n_jobs=8 hydra.run.dir=kdd_nsl/logs/metric/seqratio ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/reports/train/seqratio/score_dict.json
- hash: md5
- md5: 9075115a02136aaa59bd87074589ce42
- size: 430
- test_each_metric@seqratio-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=seqratio files.name=seqratio
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn
- model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/seqratio.npz hydra.sweeper.n_jobs=8
- hydra.run.dir=truthseeker/logs/metric/seqratio ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/reports/train/seqratio/score_dict.json
- hash: md5
- md5: ac2bdff9261ce4c9e511294dd69b19f8
- size: 434
- test_each_metric@seqratio-sms_spam:
- cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=seqratio files.name=seqratio
- files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn
- model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/seqratio.npz hydra.sweeper.n_jobs=8
- hydra.run.dir=sms_spam/logs/metric/seqratio ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 14173762472fe294a1d3228b4ee83d4b
- size: 431
- - path: params.yaml
- hash: md5
- md5: 4999b48c21cb63a45801003d03576594
- size: 2082
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 2
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 10
- train_size: 10
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix:
- k: 1
- m: -1
- method:
- metric: ncd
- test_each_method@ddos-random:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=random model.init.m=3
- data.sample.train_size=100 files.name=random files.directory=ddos data=ddos
- dataset=ddos model_name=random hydra.run.dir=ddos/logs/method/random ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/logs/method/random
- hash: md5
- md5: 3bfcc27fd44bf9333be7081f3fceb94c.dir
- size: 8340
- nfiles: 4
- - path: ddos/reports/train/random/score_dict.json
- hash: md5
- md5: 218449c8e2b7425707008d01e751eee4
- size: 281
- test_each_method@ddos-medoid:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=medoid model.init.m=3
- data.sample.train_size=100 files.name=medoid files.directory=ddos data=ddos
- dataset=ddos model_name=medoid hydra.run.dir=ddos/logs/method/medoid ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/logs/method/medoid
- hash: md5
- md5: cab03f71d3883157c103a207662f0f01.dir
- size: 8377
- nfiles: 4
- - path: ddos/reports/train/medoid/score_dict.json
- hash: md5
- md5: eb281dc186936044bcf39edf3b5c2a97
- size: 283
- test_each_method@ddos-sum:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=sum model.init.m=3
- data.sample.train_size=100 files.name=sum files.directory=ddos data=ddos dataset=ddos
- model_name=sum hydra.run.dir=ddos/logs/method/sum ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/logs/method/sum
- hash: md5
- md5: 1acd35c26f1f01c1d97695be4df4be9f.dir
- size: 8320
- nfiles: 4
- - path: ddos/reports/train/sum/score_dict.json
- hash: md5
- md5: d8ee90602dcf3e5e3d1541fd051d8c25
- size: 283
- test_each_method@ddos-svc:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=svc model.init.m=3
- data.sample.train_size=100 files.name=svc files.directory=ddos data=ddos dataset=ddos
- model_name=svc hydra.run.dir=ddos/logs/method/svc ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/logs/method/svc
- hash: md5
- md5: ff1e2d4db8fbd074fae27c28e6d7efab.dir
- size: 8317
- nfiles: 4
- - path: ddos/reports/train/svc/score_dict.json
- hash: md5
- md5: 02086eaaafb2de9549a587e0cac8d44f
- size: 280
- test_each_method@ddos-condensed:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=condensed model.init.m=1
- files.name=condensed files.directory=ddos data=ddos dataset=ddos model_name=condensed
- hydra.run.dir=ddos/logs/method/condensed ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 064e5bb42979e36c917c538b2a7bc0cc
- size: 489
- - path: params.yaml
- hash: md5
- md5: 8e937140db56a135e97c05461c573520
- size: 1345
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/logs/method/condensed
- hash: md5
- md5: 5dfc9ebfe1c6f3e496814c86a05a5329.dir
- size: 10117
- nfiles: 4
- - path: ddos/reports/train/condensed/score_dict.json
- hash: md5
- md5: 56bcddf54558d9cdd1a7587878aceffa
- size: 284
- test_each_method@ddos-hardness:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=hardness model.init.m=3
- data.sample.train_size=100 files.name=hardness files.directory=ddos data=ddos
- dataset=ddos model_name=hardness hydra.run.dir=ddos/logs/method/hardness ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/logs/method/hardness
- hash: md5
- md5: 92679e897538c5e98e89f11ca456f483.dir
- size: 8413
- nfiles: 4
- - path: ddos/reports/train/hardness/score_dict.json
- hash: md5
- md5: 24a77200255cec8b4ec9f1877188fdda
- size: 281
- test_each_method@ddos-nearmiss:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=nearmiss model.init.m=3
- data.sample.train_size=100 files.name=nearmiss files.directory=ddos data=ddos
- dataset=ddos model_name=nearmiss hydra.run.dir=ddos/logs/method/nearmiss ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/logs/method/nearmiss
- hash: md5
- md5: 84fc6455a5c576fa04c36919c33ae8fd.dir
- size: 8416
- nfiles: 4
- - path: ddos/reports/train/nearmiss/score_dict.json
- hash: md5
- md5: b4602181657a738a97631883018e221a
- size: 284
- test_each_method@truthseeker-svc:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=svc model.init.m=3
- data.sample.train_size=100 files.name=svc files.directory=truthseeker data=truthseeker
- dataset=truthseeker model_name=svc hydra.run.dir=truthseeker/logs/method/svc
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 064e5bb42979e36c917c538b2a7bc0cc
- size: 489
- - path: params.yaml
- hash: md5
- md5: 8e937140db56a135e97c05461c573520
- size: 1345
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/logs/method/svc
- hash: md5
- md5: 7f9ad95f5b5a7d8ea8a41d09560bca7e.dir
- size: 10252
- nfiles: 4
- - path: truthseeker/reports/train/svc/score_dict.json
- hash: md5
- md5: dca27d752d8d9db2b52a61d9e0d9bebf
- size: 283
- test_each_method@truthseeker-medoid:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=medoid model.init.m=3
- data.sample.train_size=100 files.name=medoid files.directory=truthseeker data=truthseeker
- dataset=truthseeker model_name=medoid hydra.run.dir=truthseeker/logs/method/medoid
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 064e5bb42979e36c917c538b2a7bc0cc
- size: 489
- - path: params.yaml
- hash: md5
- md5: 8e937140db56a135e97c05461c573520
- size: 1345
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/logs/method/medoid
- hash: md5
- md5: 57b1e2e154ae8653331898992d0d7f7c.dir
- size: 10316
- nfiles: 4
- - path: truthseeker/reports/train/medoid/score_dict.json
- hash: md5
- md5: a728020aeb632257e52cc9b13337870e
- size: 284
- test_each_method@truthseeker-sum:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=sum model.init.m=3
- data.sample.train_size=100 files.name=sum files.directory=truthseeker data=truthseeker
- dataset=truthseeker model_name=sum hydra.run.dir=truthseeker/logs/method/sum
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 064e5bb42979e36c917c538b2a7bc0cc
- size: 489
- - path: params.yaml
- hash: md5
- md5: 8e937140db56a135e97c05461c573520
- size: 1345
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/logs/method/sum
- hash: md5
- md5: b8934f0660e9e6043e5a7117d2e3d462.dir
- size: 10252
- nfiles: 4
- - path: truthseeker/reports/train/sum/score_dict.json
- hash: md5
- md5: 0a4117f35aab6ec4b41ac526f8715aa2
- size: 283
- test_each_method@truthseeker-random:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=random model.init.m=3
- data.sample.train_size=100 files.name=random files.directory=truthseeker data=truthseeker
- dataset=truthseeker model_name=random hydra.run.dir=truthseeker/logs/method/random
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 064e5bb42979e36c917c538b2a7bc0cc
- size: 489
- - path: params.yaml
- hash: md5
- md5: 8e937140db56a135e97c05461c573520
- size: 1345
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/logs/method/random
- hash: md5
- md5: a77f4e67f85e529063b18617cda5525a.dir
- size: 10289
- nfiles: 4
- - path: truthseeker/reports/train/random/score_dict.json
- hash: md5
- md5: 08f3cc499d61caaa4ab912af1a2ff558
- size: 283
- test_each_method@truthseeker-nearmiss:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=nearmiss model.init.m=3
- data.sample.train_size=100 files.name=nearmiss files.directory=truthseeker data=truthseeker
- dataset=truthseeker model_name=nearmiss hydra.run.dir=truthseeker/logs/method/nearmiss
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 064e5bb42979e36c917c538b2a7bc0cc
- size: 489
- - path: params.yaml
- hash: md5
- md5: 8e937140db56a135e97c05461c573520
- size: 1345
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/logs/method/nearmiss
- hash: md5
- md5: 6ea3f0a574d7abd052e3ee5466356e13.dir
- size: 10359
- nfiles: 4
- - path: truthseeker/reports/train/nearmiss/score_dict.json
- hash: md5
- md5: f03918d65cac7f21e210a14be8ee1373
- size: 285
- test_each_method@truthseeker-hardness:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=hardness model.init.m=3
- data.sample.train_size=100 files.name=hardness files.directory=truthseeker data=truthseeker
- dataset=truthseeker model_name=hardness hydra.run.dir=truthseeker/logs/method/hardness
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 064e5bb42979e36c917c538b2a7bc0cc
- size: 489
- - path: params.yaml
- hash: md5
- md5: 8e937140db56a135e97c05461c573520
- size: 1345
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/logs/method/hardness
- hash: md5
- md5: c5ea09925ae34a0fee42f1ec06d88090.dir
- size: 10355
- nfiles: 4
- - path: truthseeker/reports/train/hardness/score_dict.json
- hash: md5
- md5: 87bdbb0cafd4462b87035af79efc81c5
- size: 281
- test_each_method@truthseeker-knn:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=knn model.init.m=3
- data.sample.train_size=100 files.name=knn files.directory=truthseeker data=truthseeker
- dataset=truthseeker model_name=knn hydra.run.dir=truthseeker/logs/method/knn
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 064e5bb42979e36c917c538b2a7bc0cc
- size: 489
- - path: params.yaml
- hash: md5
- md5: 8e937140db56a135e97c05461c573520
- size: 1345
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/logs/method/knn
- hash: md5
- md5: 5c5fe8f17151816b01d863f51db3d01a.dir
- size: 10254
- nfiles: 4
- - path: truthseeker/reports/train/knn/score_dict.json
- hash: md5
- md5: 4157a5deabda43d207a543b9f038b5af
- size: 285
- test_each_method@ddos-knn:
- cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=knn model.init.m=3
- data.sample.train_size=100 files.name=knn files.directory=ddos data=ddos dataset=ddos
- model_name=knn hydra.run.dir=ddos/logs/method/knn ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: ddos/logs/method/knn
- hash: md5
- md5: 8d73125fea91a47efc49ba2b4a68e1fe.dir
- size: 8319
- nfiles: 4
- - path: ddos/reports/train/knn/score_dict.json
- hash: md5
- md5: fb77e1c8e53bac0e077d2140f1abc6d6
- size: 282
- condense@sms_spam-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.test_size=100 model_name=gzip_logistic model=gzip_logistic hydra.sweeper.study_name=condense_gzip_logistic_sms_spam
- hydra.sweeper.n_trials=1 hydra.sweeper.n_jobs=32 hydra.sweep.dir=sms_spam/logs/condense/gzip_logistic/
- hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/study.csv
- ++data.sample.train_size='int(interval(30, 1000))' ++data.sample.random_state='int(interval(10000,
- 20000))' ++data.sample.stratify=True model.init.m='tag(log, interval(.1, 1))'
- +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn files.directory=sms_spam
- files.reports=reports/condense/gzip_logistic/ hydra.launcher.n_jobs=32 --config-name
- gzip_logistic --multirun
- deps:
- - path: conf/model/best_gzip_logistic_sms_spam.yaml
- hash: md5
- md5: 026fca7fe5d7bb75c4a3ae245f86a2c2
- size: 332
- - path: sms_spam/logs/method/
- hash: md5
- md5: e8e327bbd5859a6c1c362fd482435727.dir
- size: 69377
- nfiles: 24
- params:
- conf/condense.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/condense/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: ???
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 2
- direction: ${direction}
- params:
- ++data.sample.train_size: int(interval(20, 1000))
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.1, 1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- outs:
- - path: sms_spam/logs/condense/gzip_logistic/
- hash: md5
- md5: 9496098bd1497b6c46124e40e665ee74.dir
- size: 14280
- nfiles: 5
- - path: sms_spam/reports/condense/gzip_logistic/
- hash: md5
- md5: c7e2a43c1dc170c3d593825f57ad0e9b.dir
- size: 2707
- nfiles: 3
- condense@truthseeker-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.test_size=100 model_name=gzip_svc model=gzip_svc hydra.sweeper.study_name=condense_gzip_svc_truthseeker
- hydra.sweeper.n_trials=1 hydra.sweeper.n_jobs=32 hydra.sweep.dir=truthseeker/logs/condense/gzip_svc/
- hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/study.csv ++data.sample.train_size='int(interval(30,
- 1000))' ++data.sample.random_state='int(interval(10000, 20000))' ++data.sample.stratify=True
- model.init.m='tag(log, interval(.1, 1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn
- files.directory=truthseeker files.reports=reports/condense/gzip_svc/ hydra.launcher.n_jobs=32
- --config-name gzip_svc --multirun
- deps:
- - path: conf/model/best_gzip_svc_truthseeker.yaml
- hash: md5
- md5: 97d9d5857744b1cc077513ac5a659f62
- size: 302
- - path: truthseeker/logs/method/
- hash: md5
- md5: 6f6693db2bb9520dc7956f0d0c003e23.dir
- size: 116543
- nfiles: 44
- params:
- conf/condense.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/condense/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: ???
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 2
- direction: ${direction}
- params:
- ++data.sample.train_size: int(interval(20, 1000))
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.1, 1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- outs:
- - path: truthseeker/logs/condense/gzip_svc/
- hash: md5
- md5: bd7cbae34fd6feecf60a49cb537b0f80.dir
- size: 13751
- nfiles: 5
- - path: truthseeker/reports/condense/gzip_svc/
- hash: md5
- md5: a24584cdc3464b86b6ff88b90dc62e5e.dir
- size: 2701
- nfiles: 3
- condense@sms_spam-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.test_size=100 model_name=gzip_svc model=best_gzip_svc_sms_spam hydra.sweeper.study_name=condense_gzip_svc_sms_spam
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/condense/gzip_svc/
- hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/study.csv model.init.m='tag(log,
- interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn
- files.directory=sms_spam files.reports=reports/condense/gzip_svc/ hydra.launcher.n_jobs=16
- --config-name condense --multirun
- deps:
- - path: conf/model/best_gzip_svc_sms_spam.yaml
- hash: md5
- md5: 771cd8e3b1368f0fbb30e518002db80f
- size: 317
- - path: sms_spam/logs/method/
- hash: md5
- md5: e8e327bbd5859a6c1c362fd482435727.dir
- size: 69377
- nfiles: 24
- params:
- conf/condense.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/condense/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: ???
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 2
- direction: ${direction}
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- outs:
- - path: sms_spam/logs/condense/gzip_svc/
- hash: md5
- md5: c6ef4ecf2bec03894b2f2018cffc0888.dir
- size: 1597147
- nfiles: 513
- - path: sms_spam/reports/condense/gzip_svc/
- hash: md5
- md5: aff4ca5c41e7043fe0d36b4a669ad6a7.dir
- size: 344414
- nfiles: 381
- condense@ddos-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.test_size=100
- model_name=gzip_svc model=best_gzip_svc_ddos hydra.sweeper.study_name=condense_gzip_svc_ddos
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/gzip_svc/
- hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/study.csv model.init.m='tag(log,
- interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn
- files.directory=ddos files.reports=reports/condense/gzip_svc/ hydra.launcher.n_jobs=16
- ++raise_exception=True --config-name condense --multirun
- deps:
- - path: conf/model/best_gzip_svc_ddos.yaml
- hash: md5
- md5: f2ec5b2ff8103b93ca61a5b86888a3e6
- size: 305
- - path: ddos/logs/method/
- hash: md5
- md5: 7128c67930147170f54fb89880528199.dir
- size: 120518
- nfiles: 48
- params:
- conf/condense.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/condense/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: ???
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 2
- direction: ${direction}
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- outs:
- - path: ddos/logs/condense/gzip_svc/
- hash: md5
- md5: 98f11cc76f9f370871bfb325ec4186e4.dir
- size: 1589126
- nfiles: 513
- - path: ddos/reports/condense/gzip_svc/
- hash: md5
- md5: 87ca8778bbdb8363a1e237019c87ebf5.dir
- size: 345583
- nfiles: 384
- condense@sms_spam-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.test_size=100 model_name=gzip_knn model=best_gzip_knn_sms_spam hydra.sweeper.study_name=condense_gzip_knn_sms_spam
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/condense/gzip_knn/
- hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/study.csv model.init.m='tag(log,
- interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn
- files.directory=sms_spam files.reports=reports/condense/gzip_knn/ hydra.launcher.n_jobs=16
- --config-name condense --multirun
- deps:
- - path: conf/model/best_gzip_knn_sms_spam.yaml
- hash: md5
- md5: 430e2be20ddaa39808a6739627a98d77
- size: 259
- - path: sms_spam/logs/method/
- hash: md5
- md5: e8e327bbd5859a6c1c362fd482435727.dir
- size: 69377
- nfiles: 24
- params:
- conf/condense.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/condense/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: ???
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 2
- direction: ${direction}
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- outs:
- - path: sms_spam/logs/condense/gzip_knn/
- hash: md5
- md5: a45625dcc1d1cc1f1e20d19440e1cdf1.dir
- size: 1559584
- nfiles: 513
- - path: sms_spam/reports/condense/gzip_knn/
- hash: md5
- md5: 0ac87faa8d16d77b4e7d5a96cfdde177.dir
- size: 335094
- nfiles: 384
- compile@sms_spam-gzip_knn:
- cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/gzip_knn --results_file
- sms_spam/reports/gzip_knn.csv
- deps:
- - path: sms_spam/reports/gzip_knn/
- hash: md5
- md5: 89e3b68400367dee648064784adb9796.dir
- size: 1499301
- nfiles: 1337
- outs:
- - path: sms_spam/reports/gzip_knn.csv
- hash: md5
- md5: ee7ee47f5ee27acca9e58b9249ecb954
- size: 695526
- compile@truthseeker-gzip_knn:
- cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/gzip_knn --results_file
- truthseeker/reports/gzip_knn.csv
- deps:
- - path: truthseeker/reports/gzip_knn/
- hash: md5
- md5: e5702237f62021b85240717035b53d81.dir
- size: 1537318
- nfiles: 1325
- outs:
- - path: truthseeker/reports/gzip_knn.csv
- hash: md5
- md5: 183afe36078f60e3e478f3813b1b52a7
- size: 711959
- compile@kdd_nsl-gzip_knn:
- cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/gzip_knn --results_file
- kdd_nsl/reports/gzip_knn.csv
- deps:
- - path: kdd_nsl/reports/gzip_knn/
- hash: md5
- md5: 4dfe630ff7f6f036220f2b9aa5b3c6b1.dir
- size: 4225577
- nfiles: 3608
- outs:
- - path: kdd_nsl/reports/gzip_knn.csv
- hash: md5
- md5: 17f27e4404093a5b50a74ca0af24e4db
- size: 1964725
- compile@truthseeker-gzip_svc:
- cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/gzip_svc --results_file
- truthseeker/reports/gzip_svc.csv
- deps:
- - path: truthseeker/reports/gzip_svc/
- hash: md5
- md5: e6e273bb143c7a8949d5be4acca87eb9.dir
- size: 1536370
- nfiles: 1725
- outs:
- - path: truthseeker/reports/gzip_svc.csv
- hash: md5
- md5: 746aae81f4af3c8ce4c8c7e3c3e866b1
- size: 870818
- compile@truthseeker-gzip_logistic:
- cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/gzip_logistic --results_file
- truthseeker/reports/gzip_logistic.csv
- deps:
- - path: truthseeker/reports/gzip_logistic/
- hash: md5
- md5: 5074027dccab644424973514ae7c8922.dir
- size: 2225784
- nfiles: 1473
- outs:
- - path: truthseeker/reports/gzip_logistic.csv
- hash: md5
- md5: ed858c429ea35f3dac4eca9c52e036ce
- size: 786129
- compile@ddos-gzip_logistic:
- cmd: python -m deckard.layers.compile --report_folder ddos/reports/gzip_logistic --results_file
- ddos/reports/gzip_logistic.csv
- deps:
- - path: ddos/reports/gzip_logistic/
- hash: md5
- md5: 6ce8a2aa8cc08ccde4467403dec1a124.dir
- size: 6278656
- nfiles: 4845
- outs:
- - path: ddos/reports/gzip_logistic.csv
- hash: md5
- md5: 7ff452295887d9c84250c7375b7ea58a
- size: 2606734
- compile@ddos-gzip_knn:
- cmd: python -m deckard.layers.compile --report_folder ddos/reports/gzip_knn --results_file
- ddos/reports/gzip_knn.csv
- deps:
- - path: ddos/reports/gzip_knn/
- hash: md5
- md5: ce89d46c7a34959f9d39a3d1e6ad8911.dir
- size: 5724814
- nfiles: 5690
- outs:
- - path: ddos/reports/gzip_knn.csv
- hash: md5
- md5: fe28ae14c5cc37ee8eb5e705c3610da8
- size: 2899113
- compile@kdd_nsl-gzip_logistic:
- cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/gzip_logistic --results_file
- kdd_nsl/reports/gzip_logistic.csv
- deps:
- - path: kdd_nsl/reports/gzip_logistic/
- hash: md5
- md5: bca1b51ebae4e3ef166f9424a0f8c1ff.dir
- size: 4923952
- nfiles: 3945
- outs:
- - path: kdd_nsl/reports/gzip_logistic.csv
- hash: md5
- md5: 07859f070e6b9246456e860d63ab4438
- size: 2149350
- compile@kdd_nsl-gzip_svc:
- cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/gzip_svc --results_file
- kdd_nsl/reports/gzip_svc.csv
- deps:
- - path: kdd_nsl/reports/gzip_svc/
- hash: md5
- md5: 907ec439b02a0d2b3ba36d54e250ff89.dir
- size: 4798455
- nfiles: 4393
- outs:
- - path: kdd_nsl/reports/gzip_svc.csv
- hash: md5
- md5: b25b5925936e935b62cdc6bd5b96d8d3
- size: 2257942
- compile@sms_spam-gzip_logistic:
- cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/gzip_logistic --results_file
- sms_spam/reports/gzip_logistic.csv
- deps:
- - path: sms_spam/reports/gzip_logistic/
- hash: md5
- md5: c70a60ca7e7e433d1cbd21bfddd26320.dir
- size: 2212768
- nfiles: 1438
- outs:
- - path: sms_spam/reports/gzip_logistic.csv
- hash: md5
- md5: 34643e6fbb37caef6b6f9054cb1b5203
- size: 754980
- compile@ddos-gzip_svc:
- cmd: python -m deckard.layers.compile --report_folder ddos/reports/gzip_svc --results_file
- ddos/reports/gzip_svc.csv
- deps:
- - path: ddos/reports/gzip_svc/
- hash: md5
- md5: 3b3fdb3e3d2321e8ee5dc36311626231.dir
- size: 6101649
- nfiles: 5283
- outs:
- - path: ddos/reports/gzip_svc.csv
- hash: md5
- md5: 7bd491b47bf7d5f373cb825e9e3d0c4c
- size: 2689051
- compile@sms_spam-gzip_svc:
- cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/gzip_svc --results_file
- sms_spam/reports/gzip_svc.csv
- deps:
- - path: sms_spam/reports/gzip_svc/
- hash: md5
- md5: 52af2b025a2aafa3e4a78db0bf221f59.dir
- size: 2173475
- nfiles: 1536
- outs:
- - path: sms_spam/reports/gzip_svc.csv
- hash: md5
- md5: 12c2eec80495a5fb326dbed7c4cfe382
- size: 758618
- clean@truthseeker-gzip_svc:
- cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_svc.csv
- -o truthseeker/plots/clean/gzip_svc.csv -c conf/clean.yaml
- deps:
- - path: truthseeker/reports/gzip_svc.csv
- hash: md5
- md5: 746aae81f4af3c8ce4c8c7e3c3e866b1
- size: 870818
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: truthseeker/plots/clean/gzip_svc.csv
- hash: md5
- md5: cdb96b7ba00dc0bf6b4c8db38311447b
- size: 679004
- clean@kdd_nsl-gzip_svc:
- cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_svc.csv -o kdd_nsl/plots/clean/gzip_svc.csv
- -c conf/clean.yaml
- deps:
- - path: kdd_nsl/reports/gzip_svc.csv
- hash: md5
- md5: b25b5925936e935b62cdc6bd5b96d8d3
- size: 2257942
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: kdd_nsl/plots/clean/gzip_svc.csv
- hash: md5
- md5: a359fb46b83265dec352e0af17f19cb2
- size: 1771361
- clean@kdd_nsl-gzip_knn:
- cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_knn.csv -o kdd_nsl/plots/clean/gzip_knn.csv
- -c conf/clean.yaml
- deps:
- - path: kdd_nsl/reports/gzip_knn.csv
- hash: md5
- md5: 17f27e4404093a5b50a74ca0af24e4db
- size: 1964725
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: kdd_nsl/plots/clean/gzip_knn.csv
- hash: md5
- md5: 686b0f04494630491244a6ead99949b7
- size: 996268
- clean@ddos-gzip_knn:
- cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_knn.csv -o ddos/plots/clean/gzip_knn.csv
- -c conf/clean.yaml
- deps:
- - path: ddos/reports/gzip_knn.csv
- hash: md5
- md5: fe28ae14c5cc37ee8eb5e705c3610da8
- size: 2899113
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: ddos/plots/clean/gzip_knn.csv
- hash: md5
- md5: ad6773d0af82535d3c525f8bf405bbfe
- size: 1919757
- clean@ddos-gzip_svc:
- cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_svc.csv -o ddos/plots/clean/gzip_svc.csv
- -c conf/clean.yaml
- deps:
- - path: ddos/reports/gzip_svc.csv
- hash: md5
- md5: 7bd491b47bf7d5f373cb825e9e3d0c4c
- size: 2689051
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: ddos/plots/clean/gzip_svc.csv
- hash: md5
- md5: 45515bad8f1a4167a7a64d0a3d62464e
- size: 1842449
- clean@kdd_nsl-gzip_logistic:
- cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_logistic.csv
- -o kdd_nsl/plots/clean/gzip_logistic.csv -c conf/clean.yaml
- deps:
- - path: kdd_nsl/reports/gzip_logistic.csv
- hash: md5
- md5: 07859f070e6b9246456e860d63ab4438
- size: 2149350
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: kdd_nsl/plots/clean/gzip_logistic.csv
- hash: md5
- md5: 82d8bddbe4db8eb6835d00931af7fc12
- size: 1456814
- clean@truthseeker-gzip_knn:
- cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_knn.csv
- -o truthseeker/plots/clean/gzip_knn.csv -c conf/clean.yaml
- deps:
- - path: truthseeker/reports/gzip_knn.csv
- hash: md5
- md5: 183afe36078f60e3e478f3813b1b52a7
- size: 711959
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: truthseeker/plots/clean/gzip_knn.csv
- hash: md5
- md5: dbbbb4c6ab13f540b1b4d9ee23d4a91a
- size: 354842
- clean@ddos-gzip_logistic:
- cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_logistic.csv -o
- ddos/plots/clean/gzip_logistic.csv -c conf/clean.yaml
- deps:
- - path: ddos/reports/gzip_logistic.csv
- hash: md5
- md5: 7ff452295887d9c84250c7375b7ea58a
- size: 2606734
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: ddos/plots/clean/gzip_logistic.csv
- hash: md5
- md5: a7d5cf7362711724ae19bba3becf66d2
- size: 1523208
- clean@sms_spam-gzip_knn:
- cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_knn.csv -o
- sms_spam/plots/clean/gzip_knn.csv -c conf/clean.yaml
- deps:
- - path: sms_spam/reports/gzip_knn.csv
- hash: md5
- md5: ee7ee47f5ee27acca9e58b9249ecb954
- size: 695526
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: sms_spam/plots/clean/gzip_knn.csv
- hash: md5
- md5: 020bbec4f2594935bd33efdcdf90eba7
- size: 358497
- clean@sms_spam-gzip_logistic:
- cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_logistic.csv
- -o sms_spam/plots/clean/gzip_logistic.csv -c conf/clean.yaml
- deps:
- - path: sms_spam/reports/gzip_logistic.csv
- hash: md5
- md5: 34643e6fbb37caef6b6f9054cb1b5203
- size: 754980
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: sms_spam/plots/clean/gzip_logistic.csv
- hash: md5
- md5: d9a1be37cfb498a7d87c116db6f553e2
- size: 497702
- clean@sms_spam-gzip_svc:
- cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_svc.csv -o
- sms_spam/plots/clean/gzip_svc.csv -c conf/clean.yaml
- deps:
- - path: sms_spam/reports/gzip_svc.csv
- hash: md5
- md5: 12c2eec80495a5fb326dbed7c4cfe382
- size: 758618
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: sms_spam/plots/clean/gzip_svc.csv
- hash: md5
- md5: 4455964d2014f4705b4ea3191cef40b2
- size: 588874
- clean@truthseeker-gzip_logistic:
- cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_logistic.csv
- -o truthseeker/plots/clean/gzip_logistic.csv -c conf/clean.yaml
- deps:
- - path: truthseeker/reports/gzip_logistic.csv
- hash: md5
- md5: 276fcd9d025d60418d6a92db6bee859e
- size: 748894
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: truthseeker/plots/clean/gzip_logistic.csv
- hash: md5
- md5: 82450f3b94f517f586b35ed85b494add
- size: 417258
- copy@sms_spam:
- cmd: 'rm -rf ~/Gzip-KNN/figs/sms_spam/ && mkdir -p ~/Gzip-KNN/figs/sms_spam/ &&
- cp -r sms_spam/plots/* ~/Gzip-KNN/figs/sms_spam/ '
- deps:
- - path: sms_spam/plots/
- hash: md5
- md5: b4562b1ad06e680bf0247d4e8dab85c1.dir
- size: 10160120
- nfiles: 19
- copy@truthseeker:
- cmd: 'rm -rf ~/Gzip-KNN/figs/truthseeker/ && mkdir -p ~/Gzip-KNN/figs/truthseeker/
- && cp -r truthseeker/plots/* ~/Gzip-KNN/figs/truthseeker/ '
- deps:
- - path: truthseeker/plots/
- hash: md5
- md5: 47a062972487c796e962fa241d4bf108.dir
- size: 8761443
- nfiles: 18
- copy@kdd_nsl:
- cmd: 'rm -rf ~/Gzip-KNN/figs/kdd_nsl/ && mkdir -p ~/Gzip-KNN/figs/kdd_nsl/ &&
- cp -r kdd_nsl/plots/* ~/Gzip-KNN/figs/kdd_nsl/ '
- deps:
- - path: kdd_nsl/plots/
- hash: md5
- md5: 526bfd7a3ffd1b1cee332632d79a96f8.dir
- size: 13281984
- nfiles: 18
- copy@ddos:
- cmd: 'rm -rf ~/Gzip-KNN/figs/ddos/ && mkdir -p ~/Gzip-KNN/figs/ddos/ && cp -r
- ddos/plots/* ~/Gzip-KNN/figs/ddos/ '
- deps:
- - path: ddos/plots/
- hash: md5
- md5: 22ac4455d4f24b7a0624f5d670f81e24.dir
- size: 15551940
- nfiles: 19
- condense@truthseeker-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.test_size=100 model_name=gzip_knn model=best_gzip_knn_truthseeker
- hydra.sweeper.study_name=condense_gzip_knn_truthseeker hydra.sweeper.n_trials=128
- hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/gzip_knn/ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/study.csv
- model.init.m='tag(log, interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn
- files.directory=truthseeker files.reports=reports/condense/gzip_knn/ hydra.launcher.n_jobs=16
- --config-name condense --multirun
- deps:
- - path: conf/model/best_gzip_knn_truthseeker.yaml
- hash: md5
- md5: 79baf4709c4a5f2535059ef8d1b6a082
- size: 258
- - path: truthseeker/logs/method/
- hash: md5
- md5: 6f6693db2bb9520dc7956f0d0c003e23.dir
- size: 116543
- nfiles: 44
- params:
- conf/condense.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/condense/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: ???
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 2
- direction: ${direction}
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- outs:
- - path: truthseeker/logs/condense/gzip_knn/
- hash: md5
- md5: 029aa9a618d0edd127756b0b724a1742.dir
- size: 1568426
- nfiles: 513
- - path: truthseeker/reports/condense/gzip_knn/
- hash: md5
- md5: ef4ee3a0a4c954cea9b4f557a216e421.dir
- size: 353591
- nfiles: 374
- plot@ddos-gzip_knn:
- cmd: python -m deckard.layers.plots --path ddos/plots/ --file ddos/plots/clean_gzip_knn.csv -c
- conf/plots.yaml
- deps:
- - path: ddos/plots/clean_gzip_knn.csv
- hash: md5
- md5: c730af75faf35ba958b15b2da82b25be
- size: 451405
- params:
- conf/plots.yaml:
- cat_plot:
- - file: symmetric_vs_metric.pdf
- x: model.init.symmetric
- y: accuracy
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Accuracy
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- - file: symmetric_vs_metric_train_time.pdf
- x: model.init.symmetric
- y: train_time
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Training Time (s)
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: log
- - file: models_vs_accuracy.pdf
- x: model_name
- y: accuracy
- hue: dataset
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Accuracy
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- rotation: 90
- - file: models_vs_train_time.pdf
- x: model_name
- y: train_time
- hue: dataset
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Training Time (s)
- legend_title: Samples
- rotation: 90
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: log
- - file: models_vs_predict_time.pdf
- x: model_name
- y: predict_time
- hue: dataset
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Prediction Time (s)
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: log
- rotation: 90
- line_plot:
- - file: metric_vs_accuracy.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: accuracy
- ylabel: Accuracy
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- - file: metric_vs_train_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: train_time
- ylabel: Training Time (s)
- y_scale: log
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- - file: metric_vs_predict_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: predict_time
- ylabel: Prediction Time (s)
- y_scale: log
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- outs:
- - path: ddos/plots/metric_vs_accuracy.pdf
- hash: md5
- md5: b8279045dcf3a1fc574578e991427e73
- size: 23629
- - path: ddos/plots/metric_vs_predict_time.pdf
- hash: md5
- md5: 1512c0c363753afc28a3c093cc8e252c
- size: 22784
- - path: ddos/plots/metric_vs_train_time.pdf
- hash: md5
- md5: dd17a922e53b59b2d9b2d91c1237bb54
- size: 22329
- - path: ddos/plots/models_vs_accuracy.pdf
- hash: md5
- md5: bbbc08a7161735b6174984688003809f
- size: 13970
- - path: ddos/plots/models_vs_predict_time.pdf
- hash: md5
- md5: 9b6acd61045df87af51626be2bdff7ab
- size: 15507
- - path: ddos/plots/models_vs_train_time.pdf
- hash: md5
- md5: 6a2303b531dfc78f20d9bf3dc62d8d42
- size: 16118
- - path: ddos/plots/symmetric_vs_metric.pdf
- hash: md5
- md5: 9cd54391a16400664710c9f0589a4d5f
- size: 22044
- - path: ddos/plots/symmetric_vs_metric_train_time.pdf
- hash: md5
- md5: 0397f39f681300638b6fcc7c2d4e3bda
- size: 21616
- plot@kdd_nsl-gzip_knn:
- cmd: python -m deckard.layers.plots --path kdd_nsl/plots/ --file kdd_nsl/plots/clean_gzip_knn.csv -c
- conf/plots.yaml
- deps:
- - path: kdd_nsl/plots/clean_gzip_knn.csv
- hash: md5
- md5: 1c001f5a7008b439ee4c7946998cbe25
- size: 1002255
- params:
- conf/plots.yaml:
- cat_plot:
- - file: symmetric_vs_metric.pdf
- x: model.init.symmetric
- y: accuracy
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Accuracy
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- - file: symmetric_vs_metric_train_time.pdf
- x: model.init.symmetric
- y: train_time
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Training Time (s)
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: log
- - file: models_vs_accuracy.pdf
- x: model_name
- y: accuracy
- hue: dataset
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Accuracy
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- rotation: 90
- - file: models_vs_train_time.pdf
- x: model_name
- y: train_time
- hue: dataset
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Training Time (s)
- legend_title: Samples
- rotation: 90
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: log
- - file: models_vs_predict_time.pdf
- x: model_name
- y: predict_time
- hue: dataset
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Prediction Time (s)
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: log
- rotation: 90
- line_plot:
- - file: metric_vs_accuracy.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: accuracy
- ylabel: Accuracy
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- - file: metric_vs_train_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: train_time
- ylabel: Training Time (s)
- y_scale: log
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- - file: metric_vs_predict_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: predict_time
- ylabel: Prediction Time (s)
- y_scale: log
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- outs:
- - path: kdd_nsl/plots/metric_vs_accuracy.pdf
- hash: md5
- md5: a146ab8c45d548ecd6c285a40c5d49e7
- size: 23228
- - path: kdd_nsl/plots/metric_vs_predict_time.pdf
- hash: md5
- md5: 59f7befb701cf34c5bf62a78206d7867
- size: 22642
- - path: kdd_nsl/plots/metric_vs_train_time.pdf
- hash: md5
- md5: 938036a897293cbf7dc0b4caa19a5596
- size: 22182
- - path: kdd_nsl/plots/models_vs_accuracy.pdf
- hash: md5
- md5: 0dad2f21fc6049c3a24972a35514ee71
- size: 15035
- - path: kdd_nsl/plots/models_vs_predict_time.pdf
- hash: md5
- md5: 4361ffb492bff25d3cde95fcdb941ced
- size: 16578
- - path: kdd_nsl/plots/models_vs_train_time.pdf
- hash: md5
- md5: 416681afbf2e0e87dcc7dfe97f0835fc
- size: 16239
- - path: kdd_nsl/plots/symmetric_vs_metric.pdf
- hash: md5
- md5: 05a28fb9adea7b847f396fdd96c37d02
- size: 22208
- - path: kdd_nsl/plots/symmetric_vs_metric_train_time.pdf
- hash: md5
- md5: 0a0a9daf98ab6efe98cb31b69cba2c65
- size: 21578
- plot@truthseeker-gzip_knn:
- cmd: python -m deckard.layers.plots --path truthseeker/plots/ --file truthseeker/plots/clean_gzip_knn.csv -c
- conf/plots.yaml
- deps:
- - path: truthseeker/plots/clean_gzip_knn.csv
- hash: md5
- md5: ff0162ac672b57d59126b965580901d9
- size: 620009
- params:
- conf/plots.yaml:
- cat_plot:
- - file: symmetric_vs_metric.pdf
- x: model.init.symmetric
- y: accuracy
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Accuracy
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- - file: symmetric_vs_metric_train_time.pdf
- x: model.init.symmetric
- y: train_time
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Training Time (s)
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: log
- - file: models_vs_accuracy.pdf
- x: model_name
- y: accuracy
- hue: dataset
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Accuracy
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- rotation: 90
- - file: models_vs_train_time.pdf
- x: model_name
- y: train_time
- hue: dataset
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Training Time (s)
- legend_title: Samples
- rotation: 90
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: log
- - file: models_vs_predict_time.pdf
- x: model_name
- y: predict_time
- hue: dataset
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Prediction Time (s)
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: log
- rotation: 90
- line_plot:
- - file: metric_vs_accuracy.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: accuracy
- ylabel: Accuracy
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- - file: metric_vs_train_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: train_time
- ylabel: Training Time (s)
- y_scale: log
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- - file: metric_vs_predict_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: predict_time
- ylabel: Prediction Time (s)
- y_scale: log
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- outs:
- - path: truthseeker/plots/metric_vs_accuracy.pdf
- hash: md5
- md5: 3cef9a04adf0d3378d4627c1a8b097a7
- size: 23348
- - path: truthseeker/plots/metric_vs_predict_time.pdf
- hash: md5
- md5: a4a5f2426ffaf289e124fb09235e374b
- size: 22838
- - path: truthseeker/plots/metric_vs_train_time.pdf
- hash: md5
- md5: cda8914da9fabcfb40ea1eb0943e28d3
- size: 22333
- - path: truthseeker/plots/models_vs_accuracy.pdf
- hash: md5
- md5: 7ef865e460d2652c873cfe333e7a308d
- size: 15215
- - path: truthseeker/plots/models_vs_predict_time.pdf
- hash: md5
- md5: eb57bd848d231a47615e311dbd1102b1
- size: 17930
- - path: truthseeker/plots/models_vs_train_time.pdf
- hash: md5
- md5: e7bc6589ed86e8f5c3fbb5a747c652fe
- size: 17739
- - path: truthseeker/plots/symmetric_vs_metric.pdf
- hash: md5
- md5: 0e7c326bef4c0d835f810c67172b2698
- size: 22057
- - path: truthseeker/plots/symmetric_vs_metric_train_time.pdf
- hash: md5
- md5: ce21956e382cc48f0a71ef7ccfd79751
- size: 21593
- plot@sms_spam-gzip_knn:
- cmd: python -m deckard.layers.plots --path sms_spam/plots/ --file sms_spam/plots/clean_gzip_knn.csv -c
- conf/plots.yaml
- deps:
- - path: sms_spam/plots/clean_gzip_knn.csv
- hash: md5
- md5: 13a5803849f7dfdefe18ba16b0a5010f
- size: 448070
- params:
- conf/plots.yaml:
- cat_plot:
- - file: symmetric_vs_metric.pdf
- x: model.init.symmetric
- y: accuracy
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Accuracy
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- - file: symmetric_vs_metric_train_time.pdf
- x: model.init.symmetric
- y: train_time
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Training Time (s)
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: log
- - file: models_vs_accuracy.pdf
- x: model_name
- y: accuracy
- hue: dataset
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Accuracy
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- rotation: 90
- - file: models_vs_train_time.pdf
- x: model_name
- y: train_time
- hue: dataset
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Training Time (s)
- legend_title: Samples
- rotation: 90
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: log
- - file: models_vs_predict_time.pdf
- x: model_name
- y: predict_time
- hue: dataset
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Prediction Time (s)
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- set:
- yscale: log
- rotation: 90
- line_plot:
- - file: metric_vs_accuracy.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: accuracy
- ylabel: Accuracy
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- - file: metric_vs_train_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: train_time
- ylabel: Training Time (s)
- y_scale: log
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- - file: metric_vs_predict_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: predict_time
- ylabel: Prediction Time (s)
- y_scale: log
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- outs:
- - path: sms_spam/plots/metric_vs_accuracy.pdf
- hash: md5
- md5: 507715814c07145dbb140b2b6714973b
- size: 23499
- - path: sms_spam/plots/metric_vs_predict_time.pdf
- hash: md5
- md5: 97ec5498625837c79fc02850afba09f1
- size: 22606
- - path: sms_spam/plots/metric_vs_train_time.pdf
- hash: md5
- md5: f4c9d0805ea5d0ac8e1a2210ee788d47
- size: 22104
- - path: sms_spam/plots/models_vs_accuracy.pdf
- hash: md5
- md5: 2eb9ea23cba4e5b734565b7aacdcf43c
- size: 14146
- - path: sms_spam/plots/models_vs_predict_time.pdf
- hash: md5
- md5: b518bf6b070e7916ad71febd3d3face6
- size: 15523
- - path: sms_spam/plots/models_vs_train_time.pdf
- hash: md5
- md5: 617f20892ba643f5c47077af63ae727f
- size: 14895
- - path: sms_spam/plots/symmetric_vs_metric.pdf
- hash: md5
- md5: 40aa8607331327c4f667fda367defb5f
- size: 22033
- - path: sms_spam/plots/symmetric_vs_metric_train_time.pdf
- hash: md5
- md5: 775c0bdfc7d9524f1e63b8879ddefccd
- size: 21590
- merge@truthseeker:
- cmd: python merge.py --big_dir truthseeker/plots/ --data_file clean/gzip_knn.csv
- --little_dir_data_file clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder
- truthseeker/plots --output_file merged.csv
- deps:
- - path: truthseeker/plots/clean/gzip_knn.csv
- hash: md5
- md5: 1f46e4b3afd875ee11939b23bd1c0852
- size: 560551
- - path: truthseeker/plots/clean/gzip_logistic.csv
- hash: md5
- md5: 82450f3b94f517f586b35ed85b494add
- size: 417258
- - path: truthseeker/plots/clean/gzip_svc.csv
- hash: md5
- md5: cdb96b7ba00dc0bf6b4c8db38311447b
- size: 679004
- outs:
- - path: truthseeker/plots/merged.csv
- hash: md5
- md5: a9b4f71f4d7eccde5a901730969b0bb1
- size: 1711555
- merge@sms_spam:
- cmd: python merge.py --big_dir sms_spam/plots/ --data_file clean/gzip_knn.csv
- --little_dir_data_file clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder
- sms_spam/plots --output_file merged.csv
- deps:
- - path: sms_spam/plots/clean/gzip_knn.csv
- hash: md5
- md5: 020bbec4f2594935bd33efdcdf90eba7
- size: 358497
- - path: sms_spam/plots/clean/gzip_logistic.csv
- hash: md5
- md5: d9a1be37cfb498a7d87c116db6f553e2
- size: 497702
- - path: sms_spam/plots/clean/gzip_svc.csv
- hash: md5
- md5: 4455964d2014f4705b4ea3191cef40b2
- size: 588874
- outs:
- - path: sms_spam/plots/merged.csv
- hash: md5
- md5: 3e3e63943b3d62dddc79e554cb691405
- size: 1492939
- merge@ddos:
- cmd: python merge.py --big_dir ddos/plots/ --data_file clean/gzip_knn.csv --little_dir_data_file
- clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder ddos/plots --output_file
- merged.csv
- deps:
- - path: ddos/plots/clean/gzip_knn.csv
- hash: md5
- md5: ad6773d0af82535d3c525f8bf405bbfe
- size: 1919757
- - path: ddos/plots/clean/gzip_logistic.csv
- hash: md5
- md5: a7d5cf7362711724ae19bba3becf66d2
- size: 1523208
- - path: ddos/plots/clean/gzip_svc.csv
- hash: md5
- md5: 45515bad8f1a4167a7a64d0a3d62464e
- size: 1842449
- outs:
- - path: ddos/plots/merged.csv
- hash: md5
- md5: 2fd123789b3c749a653aa9c142d23858
- size: 5465498
- merge@kdd_nsl:
- cmd: python merge.py --big_dir kdd_nsl/plots/ --data_file clean/gzip_knn.csv --little_dir_data_file
- clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder kdd_nsl/plots --output_file
- merged.csv
- deps:
- - path: kdd_nsl/plots/clean/gzip_knn.csv
- hash: md5
- md5: 686b0f04494630491244a6ead99949b7
- size: 996268
- - path: kdd_nsl/plots/clean/gzip_logistic.csv
- hash: md5
- md5: 82d8bddbe4db8eb6835d00931af7fc12
- size: 1456814
- - path: kdd_nsl/plots/clean/gzip_svc.csv
- hash: md5
- md5: a359fb46b83265dec352e0af17f19cb2
- size: 1771361
- outs:
- - path: kdd_nsl/plots/merged.csv
- hash: md5
- md5: 7817c0dd6f149eb072f4a5c787fa9655
- size: 4361588
- plot@kdd_nsl:
- cmd: python -m deckard.layers.plots --path kdd_nsl/plots/ --file kdd_nsl/plots/merged.csv -c
- conf/plots.yaml
- deps:
- - path: kdd_nsl/plots/merged.csv
- hash: md5
- md5: 7817c0dd6f149eb072f4a5c787fa9655
- size: 4361588
- params:
- conf/plots.yaml:
- cat_plot:
- - file: symmetric_vs_metric.pdf
- x: model.init.symmetric
- y: accuracy
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Accuracy
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- - file: symmetric_vs_metric_train_time.pdf
- x: model.init.symmetric
- y: train_time
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Training Time (s)
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: log
- - file: models_vs_accuracy.pdf
- x: model_name
- y: accuracy
- hue: data.sample.train_size
- errorbar: se
- kind: boxen
- titles:
- xlabels: Model
- ylabels: Accuracy
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- rotation: 90
- - file: models_vs_train_time.pdf
- x: model_name
- y: accuracy
- hue: data.sample.train_size
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Training Time (s)
- legend_title: Samples
- rotation: 90
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: log
- - file: models_vs_predict_time.pdf
- x: model_name
- y: accuracy
- hue: data.sample.train_size
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Prediction Time (s)
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: log
- rotation: 90
- line_plot:
- - file: metric_vs_accuracy.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: accuracy
- ylabel: Accuracy
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: metric_vs_train_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: train_time
- ylabel: Training Time (s)
- y_scale: linear
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: metric_vs_predict_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: predict_time
- ylabel: Prediction Time (s)
- y_scale: linear
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- outs:
- - path: kdd_nsl/plots/metric_vs_accuracy.pdf
- hash: md5
- md5: 2abfc1441c3515f07d2e28459e730a4f
- size: 24689
- - path: kdd_nsl/plots/metric_vs_predict_time.pdf
- hash: md5
- md5: d91c94bf17617b79b2a417710efb9dfc
- size: 23239
- - path: kdd_nsl/plots/metric_vs_train_time.pdf
- hash: md5
- md5: d2c40b3e36886868c650917d02015be4
- size: 24227
- - path: kdd_nsl/plots/models_vs_accuracy.pdf
- hash: md5
- md5: c6807ba0356e42159d683a2b3ab610a9
- size: 23546
- - path: kdd_nsl/plots/models_vs_predict_time.pdf
- hash: md5
- md5: 2f6d79e1a5164884b87ef3f40bdafeeb
- size: 19370
- - path: kdd_nsl/plots/models_vs_train_time.pdf
- hash: md5
- md5: 30ed28915c3ff6de16fffbf8c6bdda45
- size: 18949
- - path: kdd_nsl/plots/symmetric_vs_metric.pdf
- hash: md5
- md5: 1d0bb7d03823bb54b5b12b50dbc6615c
- size: 22232
- - path: kdd_nsl/plots/symmetric_vs_metric_train_time.pdf
- hash: md5
- md5: 802d5119895198601ba2ee24b3cc9528
- size: 21618
- plot@truthseeker:
- cmd: python -m deckard.layers.plots --path truthseeker/plots/ --file truthseeker/plots/merged.csv -c
- conf/plots.yaml
- deps:
- - path: truthseeker/plots/merged.csv
- hash: md5
- md5: a9b4f71f4d7eccde5a901730969b0bb1
- size: 1711555
- params:
- conf/plots.yaml:
- cat_plot:
- - file: symmetric_vs_metric.pdf
- x: model.init.symmetric
- y: accuracy
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Accuracy
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- - file: symmetric_vs_metric_train_time.pdf
- x: model.init.symmetric
- y: train_time
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Training Time (s)
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: log
- - file: models_vs_accuracy.pdf
- x: model_name
- y: accuracy
- hue: data.sample.train_size
- errorbar: se
- kind: boxen
- titles:
- xlabels: Model
- ylabels: Accuracy
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- rotation: 90
- - file: models_vs_train_time.pdf
- x: model_name
- y: accuracy
- hue: data.sample.train_size
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Training Time (s)
- legend_title: Samples
- rotation: 90
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: log
- - file: models_vs_predict_time.pdf
- x: model_name
- y: accuracy
- hue: data.sample.train_size
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Prediction Time (s)
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: log
- rotation: 90
- line_plot:
- - file: metric_vs_accuracy.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: accuracy
- ylabel: Accuracy
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: metric_vs_train_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: train_time
- ylabel: Training Time (s)
- y_scale: linear
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: metric_vs_predict_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: predict_time
- ylabel: Prediction Time (s)
- y_scale: linear
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- outs:
- - path: truthseeker/plots/metric_vs_accuracy.pdf
- hash: md5
- md5: 935a8c7365ac4b738a1ab222357db671
- size: 23824
- - path: truthseeker/plots/metric_vs_predict_time.pdf
- hash: md5
- md5: d5095d1375ed12b1a9b9f8ce5bfee839
- size: 22984
- - path: truthseeker/plots/metric_vs_train_time.pdf
- hash: md5
- md5: c6dec8707d3da6a57eb64874b8489aa1
- size: 23404
- - path: truthseeker/plots/models_vs_accuracy.pdf
- hash: md5
- md5: c09acc549b30af58463a3a8af31b80d1
- size: 20437
- - path: truthseeker/plots/models_vs_predict_time.pdf
- hash: md5
- md5: ff7ffac5905b059ec6670c9220caf124
- size: 18153
- - path: truthseeker/plots/models_vs_train_time.pdf
- hash: md5
- md5: f48cdb573700e225810e4ed960768e57
- size: 17725
- - path: truthseeker/plots/symmetric_vs_metric.pdf
- hash: md5
- md5: 4b92b154563b9c13bb5f177d0e106002
- size: 22192
- - path: truthseeker/plots/symmetric_vs_metric_train_time.pdf
- hash: md5
- md5: 2013309b971cea5728652df1a18ece16
- size: 21586
- plot@sms_spam:
- cmd: python -m deckard.layers.plots --path sms_spam/plots/ --file sms_spam/plots/merged.csv -c
- conf/plots.yaml
- deps:
- - path: sms_spam/plots/merged.csv
- hash: md5
- md5: 3e3e63943b3d62dddc79e554cb691405
- size: 1492939
- params:
- conf/plots.yaml:
- cat_plot:
- - file: symmetric_vs_metric.pdf
- x: model.init.symmetric
- y: accuracy
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Accuracy
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- - file: symmetric_vs_metric_train_time.pdf
- x: model.init.symmetric
- y: train_time
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Training Time (s)
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: log
- - file: models_vs_accuracy.pdf
- x: model_name
- y: accuracy
- hue: data.sample.train_size
- errorbar: se
- kind: boxen
- titles:
- xlabels: Model
- ylabels: Accuracy
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- rotation: 90
- - file: models_vs_train_time.pdf
- x: model_name
- y: accuracy
- hue: data.sample.train_size
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Training Time (s)
- legend_title: Samples
- rotation: 90
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: log
- - file: models_vs_predict_time.pdf
- x: model_name
- y: accuracy
- hue: data.sample.train_size
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Prediction Time (s)
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: log
- rotation: 90
- line_plot:
- - file: metric_vs_accuracy.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: accuracy
- ylabel: Accuracy
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: metric_vs_train_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: train_time
- ylabel: Training Time (s)
- y_scale: linear
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: metric_vs_predict_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: predict_time
- ylabel: Prediction Time (s)
- y_scale: linear
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- outs:
- - path: sms_spam/plots/metric_vs_accuracy.pdf
- hash: md5
- md5: 695e96d374959cef893859230a15f1a7
- size: 24667
- - path: sms_spam/plots/metric_vs_predict_time.pdf
- hash: md5
- md5: 857505ffce8416303759a76cb29b26a3
- size: 23552
- - path: sms_spam/plots/metric_vs_train_time.pdf
- hash: md5
- md5: 98b34d861b84d36cb30f58c763445eb7
- size: 23637
- - path: sms_spam/plots/models_vs_accuracy.pdf
- hash: md5
- md5: 3d9cda5e091398ec195ff1c763fb0b5a
- size: 23033
- - path: sms_spam/plots/models_vs_predict_time.pdf
- hash: md5
- md5: 06ae4883133a4f2bb4c19f531c693fdd
- size: 19365
- - path: sms_spam/plots/models_vs_train_time.pdf
- hash: md5
- md5: f8af33a8abf0caf4fc83a69b6af565a0
- size: 18945
- - path: sms_spam/plots/symmetric_vs_metric.pdf
- hash: md5
- md5: 43b4f4865931fca59079491745c20f1c
- size: 22231
- - path: sms_spam/plots/symmetric_vs_metric_train_time.pdf
- hash: md5
- md5: 4f5b0a9ac3efe2e0daa225f79fe0e40c
- size: 21606
- plot@ddos:
- cmd: python -m deckard.layers.plots --path ddos/plots/ --file ddos/plots/merged.csv -c
- conf/plots.yaml
- deps:
- - path: ddos/plots/merged.csv
- hash: md5
- md5: 2fd123789b3c749a653aa9c142d23858
- size: 5465498
- params:
- conf/plots.yaml:
- cat_plot:
- - file: symmetric_vs_metric.pdf
- x: model.init.symmetric
- y: accuracy
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Accuracy
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- - file: symmetric_vs_metric_train_time.pdf
- x: model.init.symmetric
- y: train_time
- hue: model.init.metric
- errorbar: se
- kind: bar
- titles:
- xlabels: ''
- ylabels: Training Time (s)
- legend_title: Metrics
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: log
- - file: models_vs_accuracy.pdf
- x: model_name
- y: accuracy
- hue: data.sample.train_size
- errorbar: se
- kind: boxen
- titles:
- xlabels: Model
- ylabels: Accuracy
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: linear
- ylim:
- - 0
- - 1
- rotation: 90
- - file: models_vs_train_time.pdf
- x: model_name
- y: accuracy
- hue: data.sample.train_size
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Training Time (s)
- legend_title: Samples
- rotation: 90
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: log
- - file: models_vs_predict_time.pdf
- x: model_name
- y: accuracy
- hue: data.sample.train_size
- errorbar: se
- kind: bar
- titles:
- xlabels: Model
- ylabels: Prediction Time (s)
- legend_title: Samples
- legend:
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- set:
- yscale: log
- rotation: 90
- line_plot:
- - file: metric_vs_accuracy.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: accuracy
- ylabel: Accuracy
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: metric_vs_train_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: train_time
- ylabel: Training Time (s)
- y_scale: linear
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: metric_vs_predict_time.pdf
- hue: model.init.metric
- title:
- x: data.sample.train_size
- xlabel: Number of Training Samples
- y: predict_time
- ylabel: Prediction Time (s)
- y_scale: linear
- hue_order:
- - Gzip
- - Pickle
- - BZ2
- - Zstd
- - Lzma
- - Levenshtein
- - Ratio
- - Hamming
- - Jaro
- - Jaro-Winkler
- - SeqRatio
- errorbar: se
- err_style: bars
- xlim:
- - 10
- - 500
- legend:
- title: Metrics
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- outs:
- - path: ddos/plots/metric_vs_accuracy.pdf
- hash: md5
- md5: 3b2f9c2885d331a0cadd339177318f3f
- size: 24827
- - path: ddos/plots/metric_vs_predict_time.pdf
- hash: md5
- md5: 56c78e45d5932c61b339753810a6fed1
- size: 24347
- - path: ddos/plots/metric_vs_train_time.pdf
- hash: md5
- md5: 7ba195f1f39c450c7ebd9165eee97f32
- size: 22962
- - path: ddos/plots/models_vs_accuracy.pdf
- hash: md5
- md5: 4e5e04199aa08c3098632cf8fad2c744
- size: 23780
- - path: ddos/plots/models_vs_predict_time.pdf
- hash: md5
- md5: 41c0c84e0b3b737273692f10c366b275
- size: 19529
- - path: ddos/plots/models_vs_train_time.pdf
- hash: md5
- md5: 38dd71a6ac8cd50294d5b81bffd8425b
- size: 19106
- - path: ddos/plots/symmetric_vs_metric.pdf
- hash: md5
- md5: 72331f97089e5465a2df8a071f6dcf10
- size: 22223
- - path: ddos/plots/symmetric_vs_metric_train_time.pdf
- hash: md5
- md5: 3014b61ef7c5fe2e5276149ecd20625b
- size: 22143
- condense@truthseeker-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.test_size=100 model_name=gzip_logistic model=best_gzip_logistic_truthseeker
- hydra.sweeper.study_name=condense_gzip_logistic_truthseeker hydra.sweeper.n_trials=128
- hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/gzip_logistic/
- hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/study.csv
- model.init.m='tag(log, interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn
- files.directory=truthseeker files.reports=reports/condense/gzip_logistic/ hydra.launcher.n_jobs=16
- --config-name condense --multirun
- deps:
- - path: conf/model/best_gzip_logistic_truthseeker.yaml
- hash: md5
- md5: 448e12c542f48c074057e9374743d61e
- size: 326
- - path: truthseeker/logs/method/
- hash: md5
- md5: 6f6693db2bb9520dc7956f0d0c003e23.dir
- size: 116543
- nfiles: 44
- params:
- conf/condense.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/condense/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: ???
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 2
- direction: ${direction}
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- outs:
- - path: truthseeker/logs/condense/gzip_logistic/
- hash: md5
- md5: 79d74a0dfe0486ada3f03b24c68973dc.dir
- size: 1576129
- nfiles: 513
- - path: truthseeker/reports/condense/gzip_logistic/
- hash: md5
- md5: 3de3011b1d96e4990111f5b1601e3b9d.dir
- size: 400559
- nfiles: 343
- condense@ddos-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.test_size=100
- model_name=gzip_knn model=best_gzip_knn_ddos hydra.sweeper.study_name=condense_gzip_knn_ddos
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/gzip_knn/
- hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/study.csv model.init.m='tag(log,
- interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn
- files.directory=ddos files.reports=reports/condense/gzip_knn/ hydra.launcher.n_jobs=16
- --config-name condense --multirun
- deps:
- - path: conf/model/best_gzip_knn_ddos.yaml
- hash: md5
- md5: 74721f3e7ab6096e246c486d6080e1ab
- size: 259
- - path: ddos/logs/method/
- hash: md5
- md5: 7128c67930147170f54fb89880528199.dir
- size: 120518
- nfiles: 48
- params:
- conf/condense.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/condense/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: ???
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 2
- direction: ${direction}
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- outs:
- - path: ddos/logs/condense/gzip_knn/
- hash: md5
- md5: a2dc5aef876897f53c4076e4012b678a.dir
- size: 1542474
- nfiles: 513
- - path: ddos/reports/condense/gzip_knn/
- hash: md5
- md5: 781709e87f2e740f6a0f4e914ee9754f.dir
- size: 340848
- nfiles: 379
- condense@ddos-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.test_size=100
- model_name=gzip_logistic model=best_gzip_logistic_ddos hydra.sweeper.study_name=condense_gzip_logistic_ddos
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/gzip_logistic/
- hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/study.csv model.init.m='tag(log,
- interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn
- files.directory=ddos files.reports=reports/condense/gzip_logistic/ hydra.launcher.n_jobs=16
- --config-name condense --multirun
- deps:
- - path: conf/model/best_gzip_logistic_ddos.yaml
- hash: md5
- md5: 9507b28fa5a18b501fe9d80ec33bed1c
- size: 334
- - path: ddos/logs/method/
- hash: md5
- md5: 7128c67930147170f54fb89880528199.dir
- size: 120518
- nfiles: 48
- params:
- conf/condense.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/condense/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: ???
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 2
- direction: ${direction}
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- outs:
- - path: ddos/logs/condense/gzip_logistic/
- hash: md5
- md5: 4f8f846516837f0e7cd63c8911aff99a.dir
- size: 1623568
- nfiles: 513
- - path: ddos/reports/condense/gzip_logistic/
- hash: md5
- md5: 051b71717b4a7986a1965ebadf448838.dir
- size: 350870
- nfiles: 384
- condense@kdd_nsl-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.test_size=100 model_name=gzip_knn model=best_gzip_knn_kdd_nsl hydra.sweeper.study_name=condense_gzip_knn_kdd_nsl
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/condense/gzip_knn/
- hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/study.csv model.init.m='tag(log,
- interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn
- files.directory=kdd_nsl files.reports=reports/condense/gzip_knn/ hydra.launcher.n_jobs=16
- --config-name condense --multirun
- deps:
- - path: conf/model/best_gzip_knn_kdd_nsl.yaml
- hash: md5
- md5: 2697918626643d0136286367b83ee6b9
- size: 258
- - path: kdd_nsl/logs/method/
- hash: md5
- md5: de8764bbb2daa13261f3f5d1dff27a30.dir
- size: 79348
- nfiles: 28
- params:
- conf/condense.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/condense/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: ???
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 2
- direction: ${direction}
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- outs:
- - path: kdd_nsl/logs/condense/gzip_knn/
- hash: md5
- md5: 7d53f3534ceb486e6601d344562cfb32.dir
- size: 1564530
- nfiles: 513
- - path: kdd_nsl/reports/condense/gzip_knn/
- hash: md5
- md5: 7e5a283215281be3ee4189ebd5a6e3f1.dir
- size: 342924
- nfiles: 384
- parse_params:
- cmd: python -m deckard.layers.parse
- deps:
- - path: conf/data/default.yaml
- hash: md5
- md5: 86639d6672cfd9529dda3e2ae4036c01
- size: 22
- - path: conf/default.yaml
- hash: md5
- md5: a0a533f84a7ffce197e0db5439219faf
- size: 1504
- - path: conf/files/default.yaml
- hash: md5
- md5: 7a2df5f8b98699376c3fb4da05d70dea
- size: 306
- - path: conf/model/default.yaml
- hash: md5
- md5: 39dc7512b1d19fea54550b080d880153
- size: 27
- - path: conf/scorers/default.yaml
- hash: md5
- md5: d8d00e7d284ea68b1244743dfef8f00c
- size: 280
- outs:
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- test_each_metric@gzip-gzip_knn-kdd_nsl-20:
- cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/gzip/20
- files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
- model=gzip_knn model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/gzip/20
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/logs/test_each_metric/gzip_knn/gzip/20
- hash: md5
- md5: 6091388fcd68296e6ccd16f0955cba96.dir
- size: 7683
- nfiles: 4
- - path: kdd_nsl/reports/test_each_metric/gzip_knn/gzip/20/score_dict.json
- hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_metric@zstd-gzip_knn-kdd_nsl-20:
- cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/zstd/20
- files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
- model=gzip_knn model_name=gzip_knn model.init.metric=zstd model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/zstd/20
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/logs/test_each_metric/gzip_knn/zstd/20
- hash: md5
- md5: 704acd4e060b20b19dd8c6528ee42b02.dir
- size: 7683
- nfiles: 4
- - path: kdd_nsl/reports/test_each_metric/gzip_knn/zstd/20/score_dict.json
- hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_metric@pkl-gzip_knn-kdd_nsl-20:
- cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/pkl/20
- files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
- model=gzip_knn model_name=gzip_knn model.init.metric=pkl model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/pkl/20
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/logs/test_each_metric/gzip_knn/pkl/20
- hash: md5
- md5: 539ec713f43133226c23d088f60a66bf.dir
- size: 7668
- nfiles: 4
- - path: kdd_nsl/reports/test_each_metric/gzip_knn/pkl/20/score_dict.json
- hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_metric@bz2-gzip_knn-kdd_nsl-20:
- cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/bz2/20
- files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
- model=gzip_knn model_name=gzip_knn model.init.metric=bz2 model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/bz2/20
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/logs/test_each_metric/gzip_knn/bz2/20
- hash: md5
- md5: dc85f72896e274b978488f36ec121474.dir
- size: 7668
- nfiles: 4
- - path: kdd_nsl/reports/test_each_metric/gzip_knn/bz2/20/score_dict.json
- hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_metric@lzma-gzip_knn-kdd_nsl-20:
- cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/lzma/20
- files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
- model=gzip_knn model_name=gzip_knn model.init.metric=lzma model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/lzma/20
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/logs/test_each_metric/gzip_knn/lzma/20
- hash: md5
- md5: 3e929ed47c2f62267a513fcc9ac7faec.dir
- size: 7683
- nfiles: 4
- - path: kdd_nsl/reports/test_each_metric/gzip_knn/lzma/20/score_dict.json
- hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_metric@levenshtein-gzip_knn-kdd_nsl-20:
- cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/levenshtein/20
- files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
- model=gzip_knn model_name=gzip_knn model.init.metric=levenshtein model.init.m=-1
- hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/levenshtein/20 ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/logs/test_each_metric/gzip_knn/levenshtein/20
- hash: md5
- md5: 6e719f5801c71fe88793e4a42fe47b68.dir
- size: 7767
- nfiles: 4
- - path: kdd_nsl/reports/test_each_metric/gzip_knn/levenshtein/20/score_dict.json
- hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_metric@ratio-gzip_knn-kdd_nsl-20:
- cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/ratio/20
- files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
- model=gzip_knn model_name=gzip_knn model.init.metric=ratio model.init.m=-1
- hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/ratio/20 ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/plots.yaml:
+ cat_plot:
+ - file: symmetric_vs_compressor_metric.pdf
+ x: Metric
+ y: accuracy
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles: ' '
+ xlabels: Compressor
+ ylabels: Accuracy
+ legend_title: Metrics
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_string_metric.pdf
+ x: Metric
+ y: accuracy
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles: ' '
+ xlabels: Compressors
+ ylabels: Accuracy
+ legend_title: ' '
+ order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_metric.pdf
+ x: Metric
+ y: accuracy
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles: ' '
+ xlabels: Compressors
+ ylabels: Accuracy
+ legend_title: ' '
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_metric_train_time.pdf
+ x: Metric
+ y: train_time
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: Metrics
+ ylabels: Training Time (s)
+ legend_title: Metrics
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ y_scale: linear
+ - file: symmetric_vs_string_metric_train_time.pdf
+ x: Metric
+ y: train_time
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: Compressors
+ ylabels: Training Time (s)
+ legend_title: String Metrics
+ order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_compressor_metric_train_time.pdf
+ x: Metric
+ y: train_time
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: Compressors
+ ylabels: Training Time (s)
+ legend_title: Metrics
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ line_plot:
+ - file: compressor_metric_vs_accuracy.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: metric_vs_accuracy.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: string_metric_vs_accuracy.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: metric_vs_train_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: Training Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: compressor_metric_vs_train_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: Training Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: string_metric_vs_train_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: Training Time (s)
+ y_scale: linear
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: compressor_metric_vs_predict_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time
+ ylabel: Prediction Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - file: metric_vs_predict_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time
+ ylabel: Prediction Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ - file: string_metric_vs_predict_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time
+ ylabel: Prediction Time (s)
+ y_scale: linear
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
outs:
- - path: kdd_nsl/logs/test_each_metric/gzip_knn/ratio/20
- hash: md5
- md5: c7917445640a277d2a898413a74442e3.dir
- size: 7677
- nfiles: 4
- - path: kdd_nsl/reports/test_each_metric/gzip_knn/ratio/20/score_dict.json
- hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_metric@hamming-gzip_knn-kdd_nsl-20:
- cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/hamming/20
- files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
- model=gzip_knn model_name=gzip_knn model.init.metric=hamming model.init.m=-1
- hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/hamming/20 ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ - path: sms_spam/plots/compressor_metric_vs_accuracy.pdf
hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ md5: 5dffa574fee935f98ce74c5cd6058666
+ size: 21187
+ - path: sms_spam/plots/metric_vs_accuracy.pdf
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/logs/test_each_metric/gzip_knn/hamming/20
+ md5: b9f73f48c8c024650db938dd804cfb05
+ size: 24114
+ - path: sms_spam/plots/string_metric_vs_accuracy.pdf
hash: md5
- md5: 384b5ae13749ca9006486a64dd50faf0.dir
- size: 7707
- nfiles: 4
- - path: kdd_nsl/reports/test_each_metric/gzip_knn/hamming/20/score_dict.json
+ md5: 864db5ed7b357958078bdea3ba0bad42
+ size: 20486
+ - path: sms_spam/plots/symmetric_vs_compressor_metric.pdf
hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_dataset@gzip_knn-kdd_nsl:
- cmd: 'python -m deckard.layers.optimise stage=test_each_dataset files.name=gzip_knn
- data.sample.train_size=100 files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl
- model_name=gzip_knn model=gzip_knn hydra.run.dir=kdd_nsl/logs/test_each_dataset/gzip_knn
- ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ md5: 501f5407e6906966dcb8b0c277d44dc3
+ size: 21377
+ - path: sms_spam/plots/symmetric_vs_metric.pdf
hash: md5
- md5: 41e95614d524a857c0260b13ce77202b
- size: 488
- - path: params.yaml
+ md5: 060ab65502a83ee367156e0414905962
+ size: 31387
+ - path: sms_spam/plots/symmetric_vs_metric_train_time.pdf
hash: md5
- md5: 9a178db02b5ad8f990c7a557790a36c7
- size: 1381
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/logs/test_each_dataset/gzip_knn
+ md5: 18653a51a07e2fc5598620c2cf268fc8
+ size: 31725
+ - path: sms_spam/plots/symmetric_vs_string_metric.pdf
hash: md5
- md5: 955370e62c64341f4410f3f46f6d84fd.dir
- size: 7263
- nfiles: 4
- - path: kdd_nsl/reports/test_each_dataset/gzip_knn/score_dict.json
+ md5: fbbd49babe5bee5e8b16ac52bb01ffaa
+ size: 23669
+ - path: sms_spam/plots/symmetric_vs_string_metric_train_time.pdf
hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_dataset@gzip_knn-truthseeker:
- cmd: 'python -m deckard.layers.optimise stage=test_each_dataset files.name=gzip_knn
- data.sample.train_size=100 files.directory=truthseeker data=truthseeker dataset=truthseeker
- model_name=gzip_knn model=gzip_knn hydra.run.dir=truthseeker/logs/test_each_dataset/gzip_knn
- ++raise_exception=True '
+ md5: 9b25b9f84afa0f43c3276b7e8f1866d3
+ size: 24712
+ plot_condense@sms_spam:
+ cmd: python -m deckard.layers.plots --path sms_spam/plots/ --file sms_spam/plots/condensed_merged.csv -c
+ conf/condensed_plots.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ - path: conf/condensed_plots.yaml
hash: md5
- md5: 41e95614d524a857c0260b13ce77202b
- size: 488
- - path: params.yaml
+ md5: af17fa58e7c01bcbb396ab08de5b78d5
+ size: 1915
+ - path: sms_spam/plots/condensed_merged.csv
hash: md5
- md5: 9a178db02b5ad8f990c7a557790a36c7
- size: 1381
+ md5: aff0ab5439e406220d4c0c95d7032f71
+ size: 4293513
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: truthseeker/logs/test_each_dataset/gzip_knn
- hash: md5
- md5: f8dd2e14f7e12daed6ebfd9a552d6c4e.dir
- size: 7305
- nfiles: 4
- - path: truthseeker/reports/test_each_dataset/gzip_knn/score_dict.json
- hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_dataset@ddos-gzip_knn:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_knn data.sample.train_size=100
- files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model=gzip_knn
- hydra.run.dir=ddos/logs/train/gzip_knn ++raise_exception=True '
+ conf/condensed_plots.yaml:
+ cat_plot:
+ - file: condensing_method_vs_accuracy.pdf
+ digitize: Condensing Ratio
+ x: Condensing Method
+ hue: Condensing Ratio
+ y: accuracy
+ y_scale: linear
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ kind: boxen
+ col: Model
+ rotation: 45
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xlabels: Condensing Method
+ ylabels: Accuracy
+ legend_title: Sample Ratio
+ - file: condensing_method_vs_train_time.pdf
+ x: Condensing Method
+ hue: Condensing Ratio
+ digitize: Condensing Ratio
+ y: train_time
+ y_scale: log
+ kind: boxen
+ col: Model
+ rotation: 45
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - k-NN
+ xlabels: Condensing Method
+ ylabels: Training Time
+ legend_title: Sample Ratio
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: condensing_method_vs_predict_time.pdf
+ x: Condensing Method
+ hue: Condensing Ratio
+ digitize: Condensing Ratio
+ y: predict_time
+ y_scale: log
+ col: Model
+ rotation: 45
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ kind: boxen
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - k-NN
+ xlabels: Condensing Method
+ ylabels: Prediction Time
+ legend_title: Sample Ratio
+ outs:
+ - path: sms_spam/plots/condensing_method_vs_accuracy.pdf
+ hash: md5
+ md5: 367e877eaa1c765d35ab91cb242684ea
+ size: 77057
+ - path: sms_spam/plots/condensing_method_vs_predict_time.pdf
+ hash: md5
+ md5: d2376488f2a0c040274c3d2036733e00
+ size: 79014
+ - path: sms_spam/plots/condensing_method_vs_train_time.pdf
+ hash: md5
+ md5: cc97909ea8a9d7df69647a6705d624b4
+ size: 78699
+ copy@sms_spam:
+ cmd: rm -rf ~/Gzip-KNN/figs/sms_spam/ && mkdir -p ~/Gzip-KNN/figs/sms_spam/ &&
+ cp -r sms_spam/plots/* ~/Gzip-KNN/figs/sms_spam/ && rm -rf ~/Gzip-KNN/figs/sms_spam/.gitignore
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ - path: sms_spam/plots/
hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ md5: ee777ff721b32fb8529b6b3d4cf0241f.dir
+ size: 14711161
+ nfiles: 29
+ clean@kdd_nsl-condense/knn:
+ cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/knn.csv
+ -o kdd_nsl/plots/clean/condense/knn.csv -c conf/clean.yaml
+ deps:
+ - path: kdd_nsl/reports/condense/knn.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 36a67671da89d39ab7d0c45296693749
+ size: 2482710
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: ddos/logs/train/gzip_knn
+ - path: kdd_nsl/plots/clean/condense/knn.csv
hash: md5
- md5: 86973d6369f6a61b442f6387478ccde6.dir
- size: 8041
- nfiles: 4
- - path: ddos/reports/train/gzip_knn/score_dict.json
- hash: md5
- md5: 1269132e68fc8dff521df51cb2fe321c
- size: 284
- test_each_dataset@ddos-gzip_svc:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_svc data.sample.train_size=100
- files.directory=ddos data=ddos dataset=ddos model_name=gzip_svc model=gzip_svc
- hydra.run.dir=ddos/logs/train/gzip_svc ++raise_exception=True '
+ md5: 7faf7190b1f806dbc3eb6477cedc7ee5
+ size: 1507783
+ clean@kdd_nsl-condense/logistic:
+ cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/logistic.csv
+ -o kdd_nsl/plots/clean/condense/logistic.csv -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ - path: kdd_nsl/reports/condense/logistic.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 1325ef7a8bebf6d77e0793ce344e95cc
+ size: 2886969
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: ddos/logs/train/gzip_svc
+ - path: kdd_nsl/plots/clean/condense/logistic.csv
hash: md5
- md5: 67d472318cba51a8f9e7989991cbf09e.dir
- size: 8038
- nfiles: 4
- - path: ddos/reports/train/gzip_svc/score_dict.json
- hash: md5
- md5: 5728b15f67d338a4bf8160b60715dce8
- size: 283
- test_each_dataset@ddos-gzip_logistic:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_logistic
- data.sample.train_size=100 files.directory=ddos data=ddos dataset=ddos model_name=gzip_logistic
- model=gzip_logistic hydra.run.dir=ddos/logs/train/gzip_logistic ++raise_exception=True '
+ md5: 8baf78c24cf0a48103fe3f5c3b7ea340
+ size: 2014871
+ clean@kdd_nsl-condense/svc:
+ cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/svc.csv
+ -o kdd_nsl/plots/clean/condense/svc.csv -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ - path: kdd_nsl/reports/condense/svc.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: d825a5d325742621f7cfaf2849ddf79f
+ size: 2731160
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: ddos/logs/train/gzip_logistic
+ - path: kdd_nsl/plots/clean/condense/svc.csv
hash: md5
- md5: 24fe0f4f52e6989c5a1c65795ea0d936.dir
- size: 8173
- nfiles: 4
- - path: ddos/reports/train/gzip_logistic/score_dict.json
- hash: md5
- md5: 259b4ae57c0c1e8d08b72f7f888fbe45
- size: 281
- test_each_dataset@truthseeker-gzip_knn:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_knn data.sample.train_size=100
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn
- model=gzip_knn hydra.run.dir=truthseeker/logs/train/gzip_knn ++raise_exception=True '
+ md5: c0b256435cf12d7637b92514bf852c4c
+ size: 2007338
+ merge_condense@kdd_nsl:
+ cmd: python merge.py --big_dir kdd_nsl/plots/ --data_file clean/condense/knn.csv
+ --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder
+ kdd_nsl/plots/ --output_file condensed_merged.csv
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ - path: kdd_nsl/plots/clean/condense/knn.csv
hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ md5: 7faf7190b1f806dbc3eb6477cedc7ee5
+ size: 1507783
+ - path: kdd_nsl/plots/clean/condense/logistic.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ md5: 8baf78c24cf0a48103fe3f5c3b7ea340
+ size: 2014871
+ - path: kdd_nsl/plots/clean/condense/svc.csv
+ hash: md5
+ md5: c0b256435cf12d7637b92514bf852c4c
+ size: 2007338
outs:
- - path: truthseeker/logs/train/gzip_knn
+ - path: kdd_nsl/plots/condensed_merged.csv
hash: md5
- md5: ba3eb31317c073b3b07a9c9d1948e656.dir
- size: 8158
- nfiles: 4
- - path: truthseeker/reports/train/gzip_knn/score_dict.json
- hash: md5
- md5: 2088612d107192d0497e9fd2c569818f
- size: 283
- test_each_dataset@truthseeker-gzip_svc:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_svc data.sample.train_size=100
- files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_svc
- model=gzip_svc hydra.run.dir=truthseeker/logs/train/gzip_svc ++raise_exception=True '
+ md5: 3ce3f32f881b93574c5e475e5617847e
+ size: 5582885
+ clean@kdd_nsl-gzip_knn:
+ cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_knn.csv -o kdd_nsl/plots/clean/gzip_knn.csv
+ -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ - path: kdd_nsl/reports/gzip_knn.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 2e569940af77f7280eaa067077d75b0b
+ size: 1286094
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: truthseeker/logs/train/gzip_svc
+ - path: kdd_nsl/plots/clean/gzip_knn.csv
hash: md5
- md5: 4512bda479ab6cd5ae74e7f575928b9d.dir
- size: 8154
- nfiles: 4
- - path: truthseeker/reports/train/gzip_svc/score_dict.json
- hash: md5
- md5: 25d8ec2a07497188e4311c5d62f9ddb6
- size: 281
- test_each_dataset@truthseeker-gzip_logistic:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_logistic
- data.sample.train_size=100 files.directory=truthseeker data=truthseeker dataset=truthseeker
- model_name=gzip_logistic model=gzip_logistic hydra.run.dir=truthseeker/logs/train/gzip_logistic
- ++raise_exception=True '
+ md5: 24f521894702af73c82fd3b8b8ff27b1
+ size: 715749
+ clean@kdd_nsl-gzip_logistic:
+ cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_logistic.csv
+ -o kdd_nsl/plots/clean/gzip_logistic.csv -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ - path: kdd_nsl/reports/gzip_logistic.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: a5d9359b42a7d7b06cdc0d9438bfa836
+ size: 1406330
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: truthseeker/logs/train/gzip_logistic
+ - path: kdd_nsl/plots/clean/gzip_logistic.csv
hash: md5
- md5: e1da0260d3c55bfbf4a44bb1b96206ba.dir
- size: 8315
- nfiles: 4
- - path: truthseeker/reports/train/gzip_logistic/score_dict.json
- hash: md5
- md5: 9ba0565e8f7dcb14a1e45b8e585d9ccb
- size: 283
- test_each_dataset@sms_spam-gzip_knn:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_knn data.sample.train_size=100
- files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn
- model=gzip_knn hydra.run.dir=sms_spam/logs/train/gzip_knn ++raise_exception=True '
+ md5: 2847de576a49e63aae2ae02937d39ce4
+ size: 1056239
+ clean@kdd_nsl-gzip_svc:
+ cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_svc.csv -o kdd_nsl/plots/clean/gzip_svc.csv
+ -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ - path: kdd_nsl/reports/gzip_svc.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: db5b11d405596dfa38b7592ad89e4e4a
+ size: 1407185
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: sms_spam/logs/train/gzip_knn
+ - path: kdd_nsl/plots/clean/gzip_svc.csv
hash: md5
- md5: 2066e09b41a2f6ce0c835018278b0dc6.dir
- size: 8093
- nfiles: 4
- - path: sms_spam/reports/train/gzip_knn/score_dict.json
- hash: md5
- md5: 45ab656d14366622402a687082c5feeb
- size: 284
- test_each_dataset@sms_spam-gzip_svc:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_svc data.sample.train_size=100
- files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_svc
- model=gzip_svc hydra.run.dir=sms_spam/logs/train/gzip_svc ++raise_exception=True '
+ md5: 9438c5a8752b7c4224ba94b8ee98dee5
+ size: 1156562
+ merge@kdd_nsl:
+ cmd: python merge.py --big_dir kdd_nsl/plots/ --data_file clean/gzip_knn.csv --little_dir_data_file
+ clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder kdd_nsl/plots --output_file
+ merged.csv
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ - path: kdd_nsl/plots/clean/gzip_knn.csv
hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ md5: 24f521894702af73c82fd3b8b8ff27b1
+ size: 715749
+ - path: kdd_nsl/plots/clean/gzip_logistic.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ md5: 2847de576a49e63aae2ae02937d39ce4
+ size: 1056239
+ - path: kdd_nsl/plots/clean/gzip_svc.csv
+ hash: md5
+ md5: 9438c5a8752b7c4224ba94b8ee98dee5
+ size: 1156562
outs:
- - path: sms_spam/logs/train/gzip_svc
+ - path: kdd_nsl/plots/merged.csv
hash: md5
- md5: 4f8d2f14bf8ed23f7443b91640fbb2c0.dir
- size: 8090
- nfiles: 4
- - path: sms_spam/reports/train/gzip_svc/score_dict.json
- hash: md5
- md5: 6cf7317e720631b93bcd699b22a9c4ec
- size: 283
- test_each_dataset@sms_spam-gzip_logistic:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_logistic
- data.sample.train_size=100 files.directory=sms_spam data=sms_spam dataset=sms_spam
- model_name=gzip_logistic model=gzip_logistic hydra.run.dir=sms_spam/logs/train/gzip_logistic
- ++raise_exception=True '
+ md5: e9aaa44e6ef176c174b296c31a6760f9
+ size: 2956133
+ plot@kdd_nsl:
+ cmd: python -m deckard.layers.plots --path kdd_nsl/plots/ --file kdd_nsl/plots/merged.csv -c
+ conf/plots.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ - path: conf/plots.yaml
hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ md5: 43e3ec0876b55c83f231615f7a904e33
+ size: 7386
+ - path: kdd_nsl/plots/merged.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: e9aaa44e6ef176c174b296c31a6760f9
+ size: 2956133
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/plots.yaml:
+ cat_plot:
+ - file: symmetric_vs_compressor_metric.pdf
+ x: Metric
+ y: accuracy
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles: ' '
+ xlabels: Compressor
+ ylabels: Accuracy
+ legend_title: Metrics
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_string_metric.pdf
+ x: Metric
+ y: accuracy
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles: ' '
+ xlabels: Compressors
+ ylabels: Accuracy
+ legend_title: ' '
+ order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_metric.pdf
+ x: Metric
+ y: accuracy
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles: ' '
+ xlabels: Compressors
+ ylabels: Accuracy
+ legend_title: ' '
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_metric_train_time.pdf
+ x: Metric
+ y: train_time
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: Metrics
+ ylabels: Training Time (s)
+ legend_title: Metrics
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ y_scale: linear
+ - file: symmetric_vs_string_metric_train_time.pdf
+ x: Metric
+ y: train_time
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: Compressors
+ ylabels: Training Time (s)
+ legend_title: String Metrics
+ order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_compressor_metric_train_time.pdf
+ x: Metric
+ y: train_time
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: Compressors
+ ylabels: Training Time (s)
+ legend_title: Metrics
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ line_plot:
+ - file: compressor_metric_vs_accuracy.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: metric_vs_accuracy.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: string_metric_vs_accuracy.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: metric_vs_train_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: Training Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: compressor_metric_vs_train_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: Training Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: string_metric_vs_train_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: Training Time (s)
+ y_scale: linear
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: compressor_metric_vs_predict_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time
+ ylabel: Prediction Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - file: metric_vs_predict_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time
+ ylabel: Prediction Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ - file: string_metric_vs_predict_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time
+ ylabel: Prediction Time (s)
+ y_scale: linear
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
outs:
- - path: sms_spam/logs/train/gzip_logistic
+ - path: kdd_nsl/plots/compressor_metric_vs_accuracy.pdf
hash: md5
- md5: e9577cb3ce87a9e0a55da46017111e2a.dir
- size: 8225
- nfiles: 4
- - path: sms_spam/reports/train/gzip_logistic/score_dict.json
- hash: md5
- md5: 8c39b120c89ed2d1c51c88d99f202ab1
- size: 281
- test_each_dataset@kdd_nsl-gzip_knn:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_knn data.sample.train_size=100
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model=gzip_knn
- hydra.run.dir=kdd_nsl/logs/train/gzip_knn ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ md5: c489db933d8ba20b46f2c660a0a3047a
+ size: 21218
+ - path: kdd_nsl/plots/metric_vs_accuracy.pdf
hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ md5: 7a142e5701cc21160fda0863069f047d
+ size: 24512
+ - path: kdd_nsl/plots/string_metric_vs_accuracy.pdf
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/logs/train/gzip_knn
+ md5: 887d2ab7003eaf8f7802f4283dfc7fef
+ size: 20482
+ - path: kdd_nsl/plots/symmetric_vs_compressor_metric.pdf
hash: md5
- md5: d9f95ac89efb51e0b9474a50ed1ee34d.dir
- size: 8108
- nfiles: 4
- - path: kdd_nsl/reports/train/gzip_knn/score_dict.json
- hash: md5
- md5: 1bb23417615a5663b20ae3c9bb05ab41
- size: 284
- test_each_dataset@kdd_nsl-gzip_svc:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_svc data.sample.train_size=100
- files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_svc model=gzip_svc
- hydra.run.dir=kdd_nsl/logs/train/gzip_svc ++raise_exception=True '
- deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ md5: 3a7c06d30bdcbca9f6a07d638868fbba
+ size: 21400
+ - path: kdd_nsl/plots/symmetric_vs_metric.pdf
hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ md5: c6702ce379c3f136e12dc9ea9026388a
+ size: 31309
+ - path: kdd_nsl/plots/symmetric_vs_metric_train_time.pdf
+ hash: md5
+ md5: 96cbbe31be92230fb5fa87cc8c4e439f
+ size: 32172
+ - path: kdd_nsl/plots/symmetric_vs_string_metric.pdf
+ hash: md5
+ md5: cc66d61cd5b6709b480d5040eca3dd6a
+ size: 22907
+ - path: kdd_nsl/plots/symmetric_vs_string_metric_train_time.pdf
+ hash: md5
+ md5: 2a87a16ab34be554a1c5cba1a00f5ff8
+ size: 25045
+ clean@ddos-gzip_knn:
+ cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_knn.csv -o ddos/plots/clean/gzip_knn.csv
+ -c conf/clean.yaml
+ deps:
+ - path: ddos/reports/gzip_knn.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 300b372df1c4be34b85f4080667329a1
+ size: 1537512
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: kdd_nsl/logs/train/gzip_svc
+ - path: ddos/plots/clean/gzip_knn.csv
hash: md5
- md5: 8efe1af9a07fe35bf35a620aecc9984e.dir
- size: 8105
- nfiles: 4
- - path: kdd_nsl/reports/train/gzip_svc/score_dict.json
- hash: md5
- md5: 6e851ecef3c53745a566ce54bc9b64e3
- size: 283
- test_each_dataset@kdd_nsl-gzip_logistic:
- cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_logistic
- data.sample.train_size=100 files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl
- model_name=gzip_logistic model=gzip_logistic hydra.run.dir=kdd_nsl/logs/train/gzip_logistic
- ++raise_exception=True '
+ md5: 4dcfbd9357af1a17978265cd5cf7b389
+ size: 1231290
+ clean@ddos-gzip_logistic:
+ cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_logistic.csv -o
+ ddos/plots/clean/gzip_logistic.csv -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ - path: ddos/reports/gzip_logistic.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 476499955f6c0b8f796c2d8274ad108d
+ size: 1387052
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: kdd_nsl/logs/train/gzip_logistic
- hash: md5
- md5: b3b1f1813a6bc3b51b1aca53b3730892.dir
- size: 8240
- nfiles: 4
- - path: kdd_nsl/reports/train/gzip_logistic/score_dict.json
+ - path: ddos/plots/clean/gzip_logistic.csv
hash: md5
- md5: ce2f45436d570475e2cd62b1d5417305
- size: 281
- test_each_metric@jaro-gzip_knn-kdd_nsl-20:
- cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/jaro/20
- files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
- model=gzip_knn model_name=gzip_knn model.init.metric=jaro model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/jaro/20
- ++raise_exception=True '
+ md5: 10f4e37f4dc1bf7874461430c547a9c8
+ size: 929254
+ clean@ddos-gzip_svc:
+ cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_svc.csv -o ddos/plots/clean/gzip_svc.csv
+ -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ - path: ddos/reports/gzip_svc.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: d85b5ddf9fab15d76641603c4d774a79
+ size: 1376765
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: kdd_nsl/logs/test_each_metric/gzip_knn/jaro/20
+ - path: ddos/plots/clean/gzip_svc.csv
hash: md5
- md5: 8b71ff09c44e615322095f861b3f1dca.dir
- size: 7662
- nfiles: 4
- - path: kdd_nsl/reports/test_each_metric/gzip_knn/jaro/20/score_dict.json
+ md5: 39e10d3afe8e5a6a008300166abf64b6
+ size: 1111620
+ merge@ddos:
+ cmd: python merge.py --big_dir ddos/plots/ --data_file clean/gzip_knn.csv --little_dir_data_file
+ clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder ddos/plots --output_file
+ merged.csv
+ deps:
+ - path: ddos/plots/clean/gzip_knn.csv
hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_metric@jaro_winkler-gzip_knn-kdd_nsl-20:
- cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/jaro_winkler/20
- files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
- model=gzip_knn model_name=gzip_knn model.init.metric=jaro_winkler model.init.m=-1
- hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/jaro_winkler/20 ++raise_exception=True '
+ md5: 4dcfbd9357af1a17978265cd5cf7b389
+ size: 1231290
+ - path: ddos/plots/clean/gzip_logistic.csv
+ hash: md5
+ md5: 10f4e37f4dc1bf7874461430c547a9c8
+ size: 929254
+ - path: ddos/plots/clean/gzip_svc.csv
+ hash: md5
+ md5: 39e10d3afe8e5a6a008300166abf64b6
+ size: 1111620
+ outs:
+ - path: ddos/plots/merged.csv
+ hash: md5
+ md5: ddd7e1f8412a6a8d397888033a755ad2
+ size: 3305983
+ clean@truthseeker-gzip_knn:
+ cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_knn.csv
+ -o truthseeker/plots/clean/gzip_knn.csv -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ - path: truthseeker/reports/gzip_knn.csv
hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ md5: 2298733dbbc1d3a699eeaedaee005a91
+ size: 1246208
+ params:
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
+ outs:
+ - path: truthseeker/plots/clean/gzip_knn.csv
+ hash: md5
+ md5: 1f8dbb1f89957121ca5f935f2c6503bd
+ size: 691191
+ clean@truthseeker-gzip_logistic:
+ cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_logistic.csv
+ -o truthseeker/plots/clean/gzip_logistic.csv -c conf/clean.yaml
+ deps:
+ - path: truthseeker/reports/gzip_logistic.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 6ed79959e5c663c55217dcf02ed58cc9
+ size: 1351631
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: kdd_nsl/logs/test_each_metric/gzip_knn/jaro_winkler/20
+ - path: truthseeker/plots/clean/gzip_logistic.csv
hash: md5
- md5: 2b831c44b315a8b61c3f762b365c8e5f.dir
- size: 7782
- nfiles: 4
- - path: kdd_nsl/reports/test_each_metric/gzip_knn/jaro_winkler/20/score_dict.json
+ md5: e06aa9e97e30f80c615606ecd610195c
+ size: 952678
+ clean@truthseeker-gzip_svc:
+ cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_svc.csv
+ -o truthseeker/plots/clean/gzip_svc.csv -c conf/clean.yaml
+ deps:
+ - path: truthseeker/reports/gzip_svc.csv
+ hash: md5
+ md5: e7567275d1f0e7952c116b6533d43c2d
+ size: 1366409
+ params:
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
+ outs:
+ - path: truthseeker/plots/clean/gzip_svc.csv
hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_metric@seqratio-gzip_knn-kdd_nsl-20:
- cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/seqratio/20
- files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
- model=gzip_knn model_name=gzip_knn model.init.metric=seqratio model.init.m=-1
- hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/seqratio/20 ++raise_exception=True '
+ md5: 39120e9e457e55ab86298d192b7b8d51
+ size: 1112569
+ merge@truthseeker:
+ cmd: python merge.py --big_dir truthseeker/plots/ --data_file clean/gzip_knn.csv
+ --little_dir_data_file clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder
+ truthseeker/plots --output_file merged.csv
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ - path: truthseeker/plots/clean/gzip_knn.csv
hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ md5: 1f8dbb1f89957121ca5f935f2c6503bd
+ size: 691191
+ - path: truthseeker/plots/clean/gzip_logistic.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/logs/test_each_metric/gzip_knn/seqratio/20
+ md5: e06aa9e97e30f80c615606ecd610195c
+ size: 952678
+ - path: truthseeker/plots/clean/gzip_svc.csv
hash: md5
- md5: ed632f40ed8ff016cb649ab00c408114.dir
- size: 7722
- nfiles: 4
- - path: kdd_nsl/reports/test_each_metric/gzip_knn/seqratio/20/score_dict.json
+ md5: 39120e9e457e55ab86298d192b7b8d51
+ size: 1112569
+ outs:
+ - path: truthseeker/plots/merged.csv
hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_model@gzip-gzip_knn-kdd_nsl-20:
- cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_knn/gzip/20
- files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
- model=gzip_knn model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_model/gzip_knn/gzip/20
- ++raise_exception=True '
+ md5: a6294ee4d1fc5b445dbf585745dfb18e
+ size: 2783534
+ merge_datasets:
+ cmd: python merge.py --big_dir . --little_dir . --data_file sms_spam/plots/merged.csv
+ --little_dir_data_file kdd_nsl/plots/merged.csv ddos/plots/merged.csv truthseeker/plots/merged.csv
+ kdd_nsl/plots/condensed_merged.csv ddos/plots/condensed_merged.csv truthseeker/plots/condensed_merged.csv
+ sms_spam/plots/condensed_merged.csv --output_folder combined/plots/ --output_file
+ merged.csv
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ - path: ddos/plots/merged.csv
hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ md5: ddd7e1f8412a6a8d397888033a755ad2
+ size: 3305983
+ - path: kdd_nsl/plots/merged.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
- outs:
- - path: kdd_nsl/logs/test_each_model/gzip_knn/gzip/20
+ md5: e9aaa44e6ef176c174b296c31a6760f9
+ size: 2956133
+ - path: sms_spam/plots/merged.csv
hash: md5
- md5: c8075fa1867cb00a11f6df654086bd97.dir
- size: 7675
- nfiles: 4
- - path: kdd_nsl/reports/test_each_model/gzip_knn/gzip/20/score_dict.json
+ md5: 4baf51fdcc220aedc6443147a057559e
+ size: 2765074
+ - path: truthseeker/plots/merged.csv
hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_model@gzip-gzip_svc-kdd_nsl-20:
- cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_svc/gzip/20
- files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
- model=gzip_svc model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_model/gzip_svc/gzip/20
- ++raise_exception=True '
+ md5: a6294ee4d1fc5b445dbf585745dfb18e
+ size: 2783534
+ outs:
+ - path: combined/plots/merged.csv
+ hash: md5
+ md5: a7ca9f759ab63a1649889ad57e928578
+ size: 33289497
+ clean@ddos-condense/svc:
+ cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/svc.csv -o
+ ddos/plots/clean/condense/svc.csv -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
+ - path: ddos/reports/condense/svc.csv
hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ md5: f7fa9ef13258b1cc8e4dee82f395cabc
+ size: 2853089
+ params:
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
+ outs:
+ - path: ddos/plots/clean/condense/svc.csv
+ hash: md5
+ md5: a016c3958a5bedbce540628908c94082
+ size: 2336402
+ clean@truthseeker-condense/svc:
+ cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/svc.csv
+ -o truthseeker/plots/clean/condense/svc.csv -c conf/clean.yaml
+ deps:
+ - path: truthseeker/reports/condense/svc.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 789d469a26448549761aa6140fd4bc7d
+ size: 2260420
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: kdd_nsl/logs/test_each_model/gzip_svc/gzip/20
- hash: md5
- md5: 6ec9663f42d781dc482f1da6df886312.dir
- size: 7678
- nfiles: 4
- - path: kdd_nsl/reports/test_each_model/gzip_svc/gzip/20/score_dict.json
+ - path: truthseeker/plots/clean/condense/svc.csv
hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- test_each_model@gzip-gzip_logistic-kdd_nsl-20:
- cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_logistic/gzip/20
- files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
- model=gzip_logistic model_name=gzip_knn model.init.metric=gzip model.init.m=-1
- hydra.run.dir=kdd_nsl/logs/test_each_model/gzip_logistic/gzip/20 ++raise_exception=True '
+ md5: 5217ab37267115a9f3a887dda0ca9716
+ size: 1837203
+ clean@truthseeker-condense/logistic:
+ cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/logistic.csv
+ -o truthseeker/plots/clean/condense/logistic.csv -c conf/clean.yaml
deps:
- - path: kdd_nsl/reports/train/default/score_dict.json
- hash: md5
- md5: 81a03f1290fe4d5eaa739ba9807b5b20
- size: 488
- - path: params.yaml
+ - path: truthseeker/reports/condense/logistic.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: d7854b284f4668d9b5706002ede597cd
+ size: 1461329
params:
- params.yaml:
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- dataset: kdd_nsl
- device_id: cpu
- files:
- _target_: deckard.base.files.FileConfig
- data_dir: data
- data_type: .csv
- directory: kdd_nsl
- model_dir: model
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
- model:
- _target_: deckard.base.model.Model
- data:
- _target_: deckard.base.data.Data
- name: raw_data/kdd_nsl_undersampled_5000.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
- k: 1
- m: -1
- metric: gzip
- name: gzip_classifier.GzipKNN
- symmetric: false
- library: sklearn
- model_name: gzip_knn
- scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: kdd_nsl/logs/test_each_model/gzip_logistic/gzip/20
+ - path: truthseeker/plots/clean/condense/logistic.csv
hash: md5
- md5: 8ba9f7659cef2c4d610fece176de1548.dir
- size: 7767
- nfiles: 4
- - path: kdd_nsl/reports/test_each_model/gzip_logistic/gzip/20/score_dict.json
+ md5: 2834667122a045b2815d6d8669d13855
+ size: 1195763
+ clean@truthseeker-condense/knn:
+ cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/knn.csv
+ -o truthseeker/plots/clean/condense/knn.csv -c conf/clean.yaml
+ deps:
+ - path: truthseeker/reports/condense/knn.csv
hash: md5
- md5: 5d8bf090bc8e34df8ed01766adfca5eb
- size: 26
- grid_search@20-kdd_nsl-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=kdd_nsl/logs/gzip_knn/20 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/20/study.csv
- files.directory=kdd_nsl files.reports=reports/gzip_knn/20 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_knn --multirun
+ md5: 09ff6b9152372998f2cc0cf9e5b10a52
+ size: 2364296
+ params:
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
+ outs:
+ - path: truthseeker/plots/clean/condense/knn.csv
+ hash: md5
+ md5: bb4310ab3db56fef5287c968e923a946
+ size: 1416979
+ plot@truthseeker:
+ cmd: python -m deckard.layers.plots --path truthseeker/plots/ --file truthseeker/plots/merged.csv -c
+ conf/plots.yaml
deps:
- - path: conf/gzip_knn.yaml
+ - path: conf/plots.yaml
hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
- - path: params.yaml
+ md5: 43e3ec0876b55c83f231615f7a904e33
+ size: 7386
+ - path: truthseeker/plots/merged.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: a6294ee4d1fc5b445dbf585745dfb18e
+ size: 2783534
params:
- conf/gzip_knn.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- direction: ${direction}
- storage: sqlite:///optuna.db
- study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
- max_failure_rate: 1.0
- params:
- model.init.k: 1,3,5,7,11
- +model.init.weights: uniform,distance
- +model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_knn
+ conf/plots.yaml:
+ cat_plot:
+ - file: symmetric_vs_compressor_metric.pdf
+ x: Metric
+ y: accuracy
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles: ' '
+ xlabels: Compressor
+ ylabels: Accuracy
+ legend_title: Metrics
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_string_metric.pdf
+ x: Metric
+ y: accuracy
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles: ' '
+ xlabels: Compressors
+ ylabels: Accuracy
+ legend_title: ' '
+ order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_metric.pdf
+ x: Metric
+ y: accuracy
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles: ' '
+ xlabels: Compressors
+ ylabels: Accuracy
+ legend_title: ' '
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_metric_train_time.pdf
+ x: Metric
+ y: train_time
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: Metrics
+ ylabels: Training Time (s)
+ legend_title: Metrics
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ y_scale: linear
+ - file: symmetric_vs_string_metric_train_time.pdf
+ x: Metric
+ y: train_time
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: Compressors
+ ylabels: Training Time (s)
+ legend_title: String Metrics
+ order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_compressor_metric_train_time.pdf
+ x: Metric
+ y: train_time
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: Compressors
+ ylabels: Training Time (s)
+ legend_title: Metrics
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ line_plot:
+ - file: compressor_metric_vs_accuracy.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: metric_vs_accuracy.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: string_metric_vs_accuracy.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: metric_vs_train_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: Training Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: compressor_metric_vs_train_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: Training Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: string_metric_vs_train_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: Training Time (s)
+ y_scale: linear
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: compressor_metric_vs_predict_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time
+ ylabel: Prediction Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - file: metric_vs_predict_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time
+ ylabel: Prediction Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ - file: string_metric_vs_predict_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time
+ ylabel: Prediction Time (s)
+ y_scale: linear
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
outs:
- - path: kdd_nsl/logs/gzip_knn/20
+ - path: truthseeker/plots/compressor_metric_vs_accuracy.pdf
hash: md5
- md5: 5c03e3e52e7a24e15acbd0b2aadfee35.dir
- size: 1389089
- nfiles: 514
- - path: kdd_nsl/reports/gzip_knn/20/train/
+ md5: fe9b34fc5c7bdb52f8092be432715ad6
+ size: 19529
+ - path: truthseeker/plots/metric_vs_accuracy.pdf
hash: md5
- md5: a7e0e97547bfac97d8518259bffdd4c1.dir
- size: 1847622
- nfiles: 1661
- grid_search@20-kdd_nsl-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic
- model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_kdd_nsl
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/20
- hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/20/study.csv
- files.directory=kdd_nsl files.reports=reports/gzip_logistic/20 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_logistic --multirun
+ md5: 2a49ccd20406d6d58692f241855c3d08
+ size: 22804
+ - path: truthseeker/plots/string_metric_vs_accuracy.pdf
+ hash: md5
+ md5: 9ae3cf88045c9556d26df2d79d493e35
+ size: 20944
+ - path: truthseeker/plots/symmetric_vs_compressor_metric.pdf
+ hash: md5
+ md5: e1765300eb083de79d90786e3ca35374
+ size: 21369
+ - path: truthseeker/plots/symmetric_vs_metric.pdf
+ hash: md5
+ md5: 00178f8d5d5644099848f066d44d5316
+ size: 31272
+ - path: truthseeker/plots/symmetric_vs_metric_train_time.pdf
+ hash: md5
+ md5: ae31c23accfaa8696452aceae673db53
+ size: 32498
+ - path: truthseeker/plots/symmetric_vs_string_metric.pdf
+ hash: md5
+ md5: 79fd5831809a53057c775ef1c52e089a
+ size: 23079
+ - path: truthseeker/plots/symmetric_vs_string_metric_train_time.pdf
+ hash: md5
+ md5: 35f27e898700bb9b4e941a1c6fc06273
+ size: 24558
+ plot@ddos:
+ cmd: python -m deckard.layers.plots --path ddos/plots/ --file ddos/plots/merged.csv -c
+ conf/plots.yaml
deps:
- - path: conf/gzip_logistic.yaml
+ - path: conf/plots.yaml
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
- - path: params.yaml
+ md5: 43e3ec0876b55c83f231615f7a904e33
+ size: 7386
+ - path: ddos/plots/merged.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: ddd7e1f8412a6a8d397888033a755ad2
+ size: 3305983
params:
- conf/gzip_logistic.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.id}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
- params:
- +model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.fit_intercept: True,False
- +model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_logistic
+ conf/plots.yaml:
+ cat_plot:
+ - file: symmetric_vs_compressor_metric.pdf
+ x: Metric
+ y: accuracy
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles: ' '
+ xlabels: Compressor
+ ylabels: Accuracy
+ legend_title: Metrics
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_string_metric.pdf
+ x: Metric
+ y: accuracy
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles: ' '
+ xlabels: Compressors
+ ylabels: Accuracy
+ legend_title: ' '
+ order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_metric.pdf
+ x: Metric
+ y: accuracy
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles: ' '
+ xlabels: Compressors
+ ylabels: Accuracy
+ legend_title: ' '
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_metric_train_time.pdf
+ x: Metric
+ y: train_time
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: Metrics
+ ylabels: Training Time (s)
+ legend_title: Metrics
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ y_scale: linear
+ - file: symmetric_vs_string_metric_train_time.pdf
+ x: Metric
+ y: train_time
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: Compressors
+ ylabels: Training Time (s)
+ legend_title: String Metrics
+ order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: symmetric_vs_compressor_metric_train_time.pdf
+ x: Metric
+ y: train_time
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: Compressors
+ ylabels: Training Time (s)
+ legend_title: Metrics
+ order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ hue_order:
+ - Asymmetric
+ - Symmetric
+ rotation: 90
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ line_plot:
+ - file: compressor_metric_vs_accuracy.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: metric_vs_accuracy.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: string_metric_vs_accuracy.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: metric_vs_train_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: Training Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: compressor_metric_vs_train_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: Training Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: string_metric_vs_train_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: Training Time (s)
+ y_scale: linear
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: compressor_metric_vs_predict_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time
+ ylabel: Prediction Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - file: metric_vs_predict_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time
+ ylabel: Prediction Time (s)
+ y_scale: linear
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ - file: string_metric_vs_predict_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time
+ ylabel: Prediction Time (s)
+ y_scale: linear
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ legend:
+ title: Metrics
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
outs:
- - path: kdd_nsl/logs/gzip_logistic/20
- hash: md5
- md5: e7528ce71bad9f745a9f5e4fcf3a2df1.dir
- size: 1571121
- nfiles: 514
- - path: kdd_nsl/reports/gzip_logistic/20/train/
- hash: md5
- md5: 127796b95b1817c4b0d9f1846537b0a6.dir
- size: 2083086
- nfiles: 1772
- grid_search@20-kdd_nsl-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=kdd_nsl/logs/gzip_svc/20 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/20/study.csv
- files.directory=kdd_nsl files.reports=reports/gzip_svc/20 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_svc --multirun
- deps:
- - path: conf/gzip_svc.yaml
- hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
- - path: params.yaml
+ - path: ddos/plots/compressor_metric_vs_accuracy.pdf
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- conf/gzip_svc.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.id}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions:
- - maximize
- metric_names:
- - accuracy
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
- storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
- params:
- +model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.gamma: scale,auto
- +model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_svc
- outs:
- - path: kdd_nsl/logs/gzip_svc/20
+ md5: 4e9ec7bc40de0eb9686c80001471c633
+ size: 21223
+ - path: ddos/plots/metric_vs_accuracy.pdf
hash: md5
- md5: a1cb35a26808d09dac04aef8fc7106cb.dir
- size: 1524012
- nfiles: 514
- - path: kdd_nsl/reports/gzip_svc/20/train/
+ md5: 55f65e038473f751761c89450273e99f
+ size: 24492
+ - path: ddos/plots/string_metric_vs_accuracy.pdf
hash: md5
- md5: f475c4428240afaaf863bb021eb82890.dir
- size: 2095726
- nfiles: 2092
- grid_search@20-truthseeker-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=truthseeker/logs/gzip_knn/20 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/20/study.csv
- files.directory=truthseeker files.reports=reports/gzip_knn/20 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_knn --multirun
- deps:
- - path: conf/gzip_knn.yaml
+ md5: 080a9ad5352a1c8a4ea0742d8fa2064d
+ size: 21341
+ - path: ddos/plots/symmetric_vs_compressor_metric.pdf
hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
- - path: params.yaml
+ md5: 7868ca14c1c3b8cff7377e570b3cd1fd
+ size: 21164
+ - path: ddos/plots/symmetric_vs_metric.pdf
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- conf/gzip_knn.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- direction: ${direction}
- storage: sqlite:///optuna.db
- study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
- max_failure_rate: 1.0
- params:
- model.init.k: 1,3,5,7,11
- +model.init.weights: uniform,distance
- +model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_knn
- outs:
- - path: truthseeker/logs/gzip_knn/20
+ md5: 3a1fdd75ec075371e20a43f6fceb5865
+ size: 31323
+ - path: ddos/plots/symmetric_vs_metric_train_time.pdf
hash: md5
- md5: 21da241789a9856418302895c146cd4d.dir
- size: 1370161
- nfiles: 514
- - path: truthseeker/reports/gzip_knn/20/train/
+ md5: 2d477f3dae3b1985f0f06b4b50e47b6d
+ size: 32595
+ - path: ddos/plots/symmetric_vs_string_metric.pdf
hash: md5
- md5: 394a7d8c033166c958996d646f822460.dir
- size: 376291
- nfiles: 340
- grid_search@20-truthseeker-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic
- model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_truthseeker
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/20
- hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/20/study.csv
- files.directory=truthseeker files.reports=reports/gzip_logistic/20 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_logistic --multirun
+ md5: c1d20c58447ed0ce378586a0a741cd2f
+ size: 23231
+ - path: ddos/plots/symmetric_vs_string_metric_train_time.pdf
+ hash: md5
+ md5: 96008fa9732748ceca2292daa7b10d5c
+ size: 25192
+ merge_condense@truthseeker:
+ cmd: python merge.py --big_dir truthseeker/plots/ --data_file clean/condense/knn.csv
+ --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder
+ truthseeker/plots/ --output_file condensed_merged.csv
deps:
- - path: conf/gzip_logistic.yaml
+ - path: truthseeker/plots/clean/condense/knn.csv
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
- - path: params.yaml
+ md5: bb4310ab3db56fef5287c968e923a946
+ size: 1416979
+ - path: truthseeker/plots/clean/condense/logistic.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- conf/gzip_logistic.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.id}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
- params:
- +model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.fit_intercept: True,False
- +model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_logistic
+ md5: 2834667122a045b2815d6d8669d13855
+ size: 1195763
+ - path: truthseeker/plots/clean/condense/svc.csv
+ hash: md5
+ md5: 5217ab37267115a9f3a887dda0ca9716
+ size: 1837203
outs:
- - path: truthseeker/logs/gzip_logistic/20
+ - path: truthseeker/plots/condensed_merged.csv
hash: md5
- md5: 4eceda9fdfa787e48b4a2d397ad89332.dir
- size: 1497002
- nfiles: 514
- - path: truthseeker/reports/gzip_logistic/20/train/
+ md5: fc78969e3c4df404d5954d906de1e2fe
+ size: 4494580
+ plot_condense@truthseeker:
+ cmd: python -m deckard.layers.plots --path truthseeker/plots/ --file truthseeker/plots/condensed_merged.csv -c
+ conf/condensed_plots.yaml
+ deps:
+ - path: conf/condensed_plots.yaml
hash: md5
- md5: 9b32f4ef152eda3a3f2e68d424d163d2.dir
- size: 555897
- nfiles: 366
- grid_search@20-truthseeker-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=truthseeker/logs/gzip_svc/20 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/20/study.csv
- files.directory=truthseeker files.reports=reports/gzip_svc/20 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_svc --multirun
+ md5: af17fa58e7c01bcbb396ab08de5b78d5
+ size: 1915
+ - path: truthseeker/plots/condensed_merged.csv
+ hash: md5
+ md5: fc78969e3c4df404d5954d906de1e2fe
+ size: 4494580
+ params:
+ conf/condensed_plots.yaml:
+ cat_plot:
+ - file: condensing_method_vs_accuracy.pdf
+ digitize: Condensing Ratio
+ x: Condensing Method
+ hue: Condensing Ratio
+ y: accuracy
+ y_scale: linear
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ kind: boxen
+ col: Model
+ rotation: 45
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xlabels: Condensing Method
+ ylabels: Accuracy
+ legend_title: Sample Ratio
+ - file: condensing_method_vs_train_time.pdf
+ x: Condensing Method
+ hue: Condensing Ratio
+ digitize: Condensing Ratio
+ y: train_time
+ y_scale: log
+ kind: boxen
+ col: Model
+ rotation: 45
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - k-NN
+ xlabels: Condensing Method
+ ylabels: Training Time
+ legend_title: Sample Ratio
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: condensing_method_vs_predict_time.pdf
+ x: Condensing Method
+ hue: Condensing Ratio
+ digitize: Condensing Ratio
+ y: predict_time
+ y_scale: log
+ col: Model
+ rotation: 45
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ kind: boxen
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - k-NN
+ xlabels: Condensing Method
+ ylabels: Prediction Time
+ legend_title: Sample Ratio
+ outs:
+ - path: truthseeker/plots/condensing_method_vs_accuracy.pdf
+ hash: md5
+ md5: 43daa962adc5b178b1ecd1ce631f7a82
+ size: 79151
+ - path: truthseeker/plots/condensing_method_vs_predict_time.pdf
+ hash: md5
+ md5: 8052368bafdaa94f3135e094f68bd55c
+ size: 76155
+ - path: truthseeker/plots/condensing_method_vs_train_time.pdf
+ hash: md5
+ md5: 5a88008752dd280bc73cee793026b594
+ size: 75513
+ copy@truthseeker:
+ cmd: rm -rf ~/Gzip-KNN/figs/truthseeker/ && mkdir -p ~/Gzip-KNN/figs/truthseeker/
+ && cp -r truthseeker/plots/* ~/Gzip-KNN/figs/truthseeker/ && rm -rf ~/Gzip-KNN/figs/truthseeker/.gitignore
deps:
- - path: conf/gzip_svc.yaml
+ - path: truthseeker/plots/
hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
- - path: params.yaml
+ md5: fed82eba40c5f980d2ecc49dcd0bd732.dir
+ size: 15135833
+ nfiles: 29
+ clean@ddos-condense/knn:
+ cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/knn.csv -o
+ ddos/plots/clean/condense/knn.csv -c conf/clean.yaml
+ deps:
+ - path: ddos/reports/condense/knn.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 1bd44b90db430d5d5785537fe732b2a6
+ size: 2816581
params:
- conf/gzip_svc.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.id}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions:
- - maximize
- metric_names:
- - accuracy
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
- storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
- params:
- +model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.gamma: scale,auto
- +model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_svc
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: truthseeker/logs/gzip_svc/20
- hash: md5
- md5: 20a01b45b6f1901a8e929bf1cbccd349.dir
- size: 1473672
- nfiles: 514
- - path: truthseeker/reports/gzip_svc/20/train/
+ - path: ddos/plots/clean/condense/knn.csv
hash: md5
- md5: a2b059debfa307134c83ec03713e8a50.dir
- size: 546743
- nfiles: 384
- grid_search@20-sms_spam-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=sms_spam/logs/gzip_knn/20 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/20/study.csv
- files.directory=sms_spam files.reports=reports/gzip_knn/20 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_knn --multirun
+ md5: 3a1acbf38f64695356c6b052547800f7
+ size: 2246228
+ clean@ddos-condense/logistic:
+ cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/logistic.csv
+ -o ddos/plots/clean/condense/logistic.csv -c conf/clean.yaml
deps:
- - path: conf/gzip_knn.yaml
- hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
- - path: params.yaml
+ - path: ddos/reports/condense/logistic.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 83a34019f32c069c16172b171a602a26
+ size: 2848813
params:
- conf/gzip_knn.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- direction: ${direction}
- storage: sqlite:///optuna.db
- study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
- max_failure_rate: 1.0
- params:
- model.init.k: 1,3,5,7,11
- +model.init.weights: uniform,distance
- +model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_knn
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
outs:
- - path: sms_spam/logs/gzip_knn/20
- hash: md5
- md5: bcee56ea959096e8255fb482a8854457.dir
- size: 1381168
- nfiles: 514
- - path: sms_spam/reports/gzip_knn/20/train/
+ - path: ddos/plots/clean/condense/logistic.csv
hash: md5
- md5: 12133daeda911e75210cff4d8a3fa5a7.dir
- size: 379524
- nfiles: 326
- grid_search@20-sms_spam-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic
- model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_sms_spam
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/20
- hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/20/study.csv
- files.directory=sms_spam files.reports=reports/gzip_logistic/20 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_logistic --multirun
+ md5: 37106f4477460267406dd80d90987cac
+ size: 2287660
+ merge_condense@ddos:
+ cmd: python merge.py --big_dir ddos/plots/ --data_file clean/condense/knn.csv
+ --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder
+ ddos/plots/ --output_file condensed_merged.csv
deps:
- - path: conf/gzip_logistic.yaml
+ - path: ddos/plots/clean/condense/knn.csv
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
- - path: params.yaml
+ md5: 3a1acbf38f64695356c6b052547800f7
+ size: 2246228
+ - path: ddos/plots/clean/condense/logistic.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- conf/gzip_logistic.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.id}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
- params:
- +model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.fit_intercept: True,False
- +model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_logistic
- outs:
- - path: sms_spam/logs/gzip_logistic/20
+ md5: 37106f4477460267406dd80d90987cac
+ size: 2287660
+ - path: ddos/plots/clean/condense/svc.csv
hash: md5
- md5: 5c7265a3ac4bf4774fbb1c440b9910c4.dir
- size: 1520121
- nfiles: 514
- - path: sms_spam/reports/gzip_logistic/20/train/
+ md5: a016c3958a5bedbce540628908c94082
+ size: 2336402
+ outs:
+ - path: ddos/plots/condensed_merged.csv
hash: md5
- md5: 9ae8109f623b19dcbabe51e4401a1f8c.dir
- size: 552539
- nfiles: 357
- grid_search@20-sms_spam-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=sms_spam/logs/gzip_svc/20 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/20/study.csv
- files.directory=sms_spam files.reports=reports/gzip_svc/20 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_svc --multirun
+ md5: a509ca15f5da44a1c7fd5fa86541824a
+ size: 6939926
+ plot_condense@ddos:
+ cmd: python -m deckard.layers.plots --path ddos/plots/ --file ddos/plots/condensed_merged.csv -c
+ conf/condensed_plots.yaml
deps:
- - path: conf/gzip_svc.yaml
+ - path: conf/condensed_plots.yaml
hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
- - path: params.yaml
+ md5: af17fa58e7c01bcbb396ab08de5b78d5
+ size: 1915
+ - path: ddos/plots/condensed_merged.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: a509ca15f5da44a1c7fd5fa86541824a
+ size: 6939926
params:
- conf/gzip_svc.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.id}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions:
- - maximize
- metric_names:
- - accuracy
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
- storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
- params:
- +model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.gamma: scale,auto
- +model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_svc
- outs:
- - path: sms_spam/logs/gzip_svc/20
- hash: md5
- md5: fe6324545be6dc97b88326e10a65e815.dir
- size: 1451676
- nfiles: 514
- - path: sms_spam/reports/gzip_svc/20/train/
- hash: md5
- md5: 814632194dc03d626a24f0418fd703e1.dir
- size: 542357
- nfiles: 384
- grid_search@20-ddos-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20
- data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=ddos/logs/gzip_knn/20 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/20/study.csv
- files.directory=ddos files.reports=reports/gzip_knn/20 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_knn --multirun
+ conf/condensed_plots.yaml:
+ cat_plot:
+ - file: condensing_method_vs_accuracy.pdf
+ digitize: Condensing Ratio
+ x: Condensing Method
+ hue: Condensing Ratio
+ y: accuracy
+ y_scale: linear
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ kind: boxen
+ col: Model
+ rotation: 45
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xlabels: Condensing Method
+ ylabels: Accuracy
+ legend_title: Sample Ratio
+ - file: condensing_method_vs_train_time.pdf
+ x: Condensing Method
+ hue: Condensing Ratio
+ digitize: Condensing Ratio
+ y: train_time
+ y_scale: log
+ kind: boxen
+ col: Model
+ rotation: 45
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - k-NN
+ xlabels: Condensing Method
+ ylabels: Training Time
+ legend_title: Sample Ratio
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: condensing_method_vs_predict_time.pdf
+ x: Condensing Method
+ hue: Condensing Ratio
+ digitize: Condensing Ratio
+ y: predict_time
+ y_scale: log
+ col: Model
+ rotation: 45
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ kind: boxen
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - k-NN
+ xlabels: Condensing Method
+ ylabels: Prediction Time
+ legend_title: Sample Ratio
+ outs:
+ - path: ddos/plots/condensing_method_vs_accuracy.pdf
+ hash: md5
+ md5: 799f438072661472c3581b7783187e27
+ size: 95036
+ - path: ddos/plots/condensing_method_vs_predict_time.pdf
+ hash: md5
+ md5: e9d99a4d20977d908bc6125b4d3ec64c
+ size: 92611
+ - path: ddos/plots/condensing_method_vs_train_time.pdf
+ hash: md5
+ md5: 38d50e2531e75b0ed7e25f99fe3a020a
+ size: 92297
+ plot_condense@kdd_nsl:
+ cmd: python -m deckard.layers.plots --path kdd_nsl/plots/ --file kdd_nsl/plots/condensed_merged.csv -c
+ conf/condensed_plots.yaml
deps:
- - path: conf/gzip_knn.yaml
+ - path: conf/condensed_plots.yaml
hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
- - path: params.yaml
+ md5: af17fa58e7c01bcbb396ab08de5b78d5
+ size: 1915
+ - path: kdd_nsl/plots/condensed_merged.csv
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 3ce3f32f881b93574c5e475e5617847e
+ size: 5582885
params:
- conf/gzip_knn.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- direction: ${direction}
- storage: sqlite:///optuna.db
- study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
- max_failure_rate: 1.0
- params:
- model.init.k: 1,3,5,7,11
- +model.init.weights: uniform,distance
- +model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_knn
+ conf/condensed_plots.yaml:
+ cat_plot:
+ - file: condensing_method_vs_accuracy.pdf
+ digitize: Condensing Ratio
+ x: Condensing Method
+ hue: Condensing Ratio
+ y: accuracy
+ y_scale: linear
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ kind: boxen
+ col: Model
+ rotation: 45
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xlabels: Condensing Method
+ ylabels: Accuracy
+ legend_title: Sample Ratio
+ - file: condensing_method_vs_train_time.pdf
+ x: Condensing Method
+ hue: Condensing Ratio
+ digitize: Condensing Ratio
+ y: train_time
+ y_scale: log
+ kind: boxen
+ col: Model
+ rotation: 45
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - k-NN
+ xlabels: Condensing Method
+ ylabels: Training Time
+ legend_title: Sample Ratio
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ - file: condensing_method_vs_predict_time.pdf
+ x: Condensing Method
+ hue: Condensing Ratio
+ digitize: Condensing Ratio
+ y: predict_time
+ y_scale: log
+ col: Model
+ rotation: 45
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ kind: boxen
+ order:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - KNN
+ xticklabels:
+ - Random
+ - Medoid
+ - Sum
+ - SVC
+ - Hardness
+ - NearMiss
+ - k-NN
+ xlabels: Condensing Method
+ ylabels: Prediction Time
+ legend_title: Sample Ratio
+ outs:
+ - path: kdd_nsl/plots/condensing_method_vs_accuracy.pdf
+ hash: md5
+ md5: 02804fa85242e8873e257703d36292b3
+ size: 93543
+ - path: kdd_nsl/plots/condensing_method_vs_predict_time.pdf
+ hash: md5
+ md5: a19ac9d498ba7a48818804efd89cc7ac
+ size: 89049
+ - path: kdd_nsl/plots/condensing_method_vs_train_time.pdf
+ hash: md5
+ md5: 0b856f827819de35d07371b6801edf04
+ size: 88882
+ plot_merged:
+ cmd: python -m deckard.layers.plots --path combined/plots/ --file combined/plots/merged.csv -c
+ conf/merged_plots.yaml
+ deps:
+ - path: combined/plots/merged.csv
+ hash: md5
+ md5: a7ca9f759ab63a1649889ad57e928578
+ size: 33289497
+ - path: conf/merged_plots.yaml
+ hash: md5
+ md5: 07cbd496003579ae0a5dc56bf03dc1a5
+ size: 8296
+ params:
+ conf/merged_plots.yaml:
+ cat_plot:
+ - file: models_vs_accuracy.pdf
+ x: Model
+ y: accuracy
+ hue: data.sample.train_size
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: ' '
+ ylabels: Accuracy
+ legend_title: Samples
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ rotation: 90
+ col: Dataset
+ order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ - file: models_vs_train_time.pdf
+ x: Model
+ y: train_time
+ hue: data.sample.train_size
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: ' '
+ ylabels: $t_t$ (s)
+ legend_title: Samples
+ rotation: 90
+ col: Dataset
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ y_scale: log
+ order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ - file: models_vs_predict_time.pdf
+ x: Model
+ y: predict_time_per_sample
+ hue: data.sample.train_size
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: ' '
+ ylabels: $t_i$ (s)
+ legend_title: Samples
+ col: Dataset
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ rotation: 90
+ y_scale: log
+ order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ - file: symmetric_models_vs_accuracy.pdf
+ row: Model
+ x: data.sample.train_size
+ y: accuracy
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: Samples
+ ylabels: Accuracy
+ legend_title: ' '
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ rotation: 90
+ col: Dataset
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ row_order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ - file: symmetric_models_vs_train_time.pdf
+ row: Model
+ x: data.sample.train_size
+ y: train_time_per_sample
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: ' '
+ ylabels: $t_t$ (s)
+ legend_title: ' '
+ rotation: 90
+ col: Dataset
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ y_scale: log
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ row_order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ - file: symmetric_models_vs_predict_time.pdf
+ x: data.sample.train_size
+ row: Model
+ y: predict_time_per_sample
+ hue: Symmetric
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: ' '
+ ylabels: $t_i$ (s)
+ legend_title: ' '
+ col: Dataset
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ rotation: 90
+ y_scale: log
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ row_order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ - file: condensing_methods_vs_accuracy.pdf
+ x: Model
+ y: accuracy
+ hue: Condensing Method
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: ' '
+ ylabels: Accuracy
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ rotation: 90
+ col: Dataset
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ legend_title: Condensing Method
+ - file: condensing_methods_vs_train_time.pdf
+ x: Model
+ y: train_time
+ hue: Condensing Method
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: ' '
+ ylabels: $t_t$ (s)
+ legend_title: Condensing Method
+ rotation: 90
+ col: Dataset
+ y_scale: log
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ - file: condensing_methods_vs_predict_time.pdf
+ x: Model
+ y: predict_time_per_sample
+ hue: Condensing Method
+ errorbar: se
+ kind: boxen
+ titles:
+ xlabels: ' '
+ ylabels: $t_i$ (s)
+ legend_title: Condensing Method
+ col: Dataset
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 14
+ rotation: 90
+ y_scale: log
+ col_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ order:
+ - k-KNN
+ - k-SVC
+ - k-Logistic
+ line_plot:
+ - file: compressor_metric_vs_accuracy.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ style: Dataset
+ style_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 12
+ - file: string_metric_vs_accuracy.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: accuracy
+ ylabel: Accuracy
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ style: Dataset
+ style_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 12
+ - file: string_metric_vs_train_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: $t_t$ (s)
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ style: Dataset
+ style_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 12
+ y_scale: log
+ - file: compressor_metric_vs_train_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: train_time
+ ylabel: $t_t$ (s)
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ style: Dataset
+ style_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 12
+ y_scale: log
+ - file: string_metric_vs_predict_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time_per_sample
+ ylabel: $t_i$ (s)
+ hue_order:
+ - Levenshtein
+ - Ratio
+ - Hamming
+ - Jaro
+ - Jaro-Winkler
+ - SeqRatio
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ style: Dataset
+ style_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 12
+ y_scale: log
+ - file: compressor_metric_vs_predict_time.pdf
+ hue: Metric
+ title:
+ x: data.sample.train_size
+ xlabel: Number of Training Samples
+ y: predict_time_per_sample
+ ylabel: $t_i$ (s)
+ hue_order:
+ - GZIP
+ - Pickle
+ - BZ2
+ - ZSTD
+ - LZMA
+ errorbar: se
+ err_style: bars
+ xlim:
+ - 10
+ - 500
+ style: Dataset
+ style_order:
+ - DDoS
+ - SMS Spam
+ - KDD NSL
+ - Truthseeker
+ legend:
+ bbox_to_anchor:
+ - 1.05
+ - 0.5
+ loc: center left
+ prop:
+ size: 12
+ y_scale: log
outs:
- - path: ddos/logs/gzip_knn/20
+ - path: combined/plots/compressor_metric_vs_accuracy.pdf
hash: md5
- md5: 057fc9613b2210a0dd1e03ef46f3d6bc.dir
- size: 1616211
- nfiles: 514
- - path: ddos/reports/gzip_knn/20/train/
+ md5: 48aea5d713cb4eac12301c89d815af62
+ size: 23029
+ - path: combined/plots/compressor_metric_vs_predict_time.pdf
hash: md5
- md5: b0ae22713c6a319a24acb69525a9f01a.dir
- size: 1375974
- nfiles: 1536
- grid_search@20-ddos-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20
- data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=ddos/logs/gzip_logistic/20 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/20/study.csv
- files.directory=ddos files.reports=reports/gzip_logistic/20 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_logistic --multirun
+ md5: 7d690d0d9381702841645a8cc47c4228
+ size: 23691
+ - path: combined/plots/compressor_metric_vs_train_time.pdf
+ hash: md5
+ md5: 7684f9f2d3fd807f5ca0791947a4f495
+ size: 23217
+ - path: combined/plots/condensing_methods_vs_accuracy.pdf
+ hash: md5
+ md5: ee93a76c66f25ab3f33d04e66dbc6c89
+ size: 61419
+ - path: combined/plots/condensing_methods_vs_predict_time.pdf
+ hash: md5
+ md5: c4d4d6309ccb922f0896c0682ebc62bb
+ size: 75130
+ - path: combined/plots/condensing_methods_vs_train_time.pdf
+ hash: md5
+ md5: 5630caa9d7cd712e9eade1e3f1f989ce
+ size: 74744
+ - path: combined/plots/models_vs_accuracy.pdf
+ hash: md5
+ md5: 89fbf635c37ad049a9d7581c819232fb
+ size: 44138
+ - path: combined/plots/models_vs_predict_time.pdf
+ hash: md5
+ md5: 7426493cc2eea4a3c795774dca34c3d7
+ size: 52991
+ - path: combined/plots/models_vs_train_time.pdf
+ hash: md5
+ md5: 8e94cfaf2d29f7900c5a79b728d22a3d
+ size: 52701
+ - path: combined/plots/string_metric_vs_accuracy.pdf
+ hash: md5
+ md5: 5da7b7e5fd2f428af3936550d29149ea
+ size: 24176
+ - path: combined/plots/string_metric_vs_predict_time.pdf
+ hash: md5
+ md5: ca75801d85720c0bab65447ab9310868
+ size: 24398
+ - path: combined/plots/string_metric_vs_train_time.pdf
+ hash: md5
+ md5: 9053fd4d1b86e8a6453c7862b2b7483a
+ size: 24458
+ - path: combined/plots/symmetric_models_vs_accuracy.pdf
+ hash: md5
+ md5: 14906a8e21db525a46910f6cc9776b37
+ size: 64101
+ - path: combined/plots/symmetric_models_vs_predict_time.pdf
+ hash: md5
+ md5: 20bbaa2bd5fb395b8d579246d0364937
+ size: 80822
+ - path: combined/plots/symmetric_models_vs_train_time.pdf
+ hash: md5
+ md5: b38a529d8bfd5dd25d8ffb4b57859225
+ size: 81185
+ copy@combined:
+ cmd: rm -rf ~/Gzip-KNN/figs/combined/ && mkdir -p ~/Gzip-KNN/figs/combined/ &&
+ cp -r combined/plots/* ~/Gzip-KNN/figs/combined/ && rm -rf ~/Gzip-KNN/figs/combined/.gitignore
deps:
- - path: conf/gzip_logistic.yaml
+ - path: combined/plots/
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
- - path: params.yaml
+ md5: fad9d0d19a575c84c55daa1cbd67b514.dir
+ size: 34019697
+ nfiles: 16
+ copy@ddos:
+ cmd: rm -rf ~/Gzip-KNN/figs/ddos/ && mkdir -p ~/Gzip-KNN/figs/ddos/ && cp -r ddos/plots/*
+ ~/Gzip-KNN/figs/ddos/ && rm -rf ~/Gzip-KNN/figs/ddos/.gitignore
+ deps:
+ - path: ddos/plots/
+ hash: md5
+ md5: 377bb3bca5774b42a32ad343d074462d.dir
+ size: 21089165
+ nfiles: 29
+ copy@kdd_nsl:
+ cmd: rm -rf ~/Gzip-KNN/figs/kdd_nsl/ && mkdir -p ~/Gzip-KNN/figs/kdd_nsl/ && cp
+ -r kdd_nsl/plots/* ~/Gzip-KNN/figs/kdd_nsl/ && rm -rf ~/Gzip-KNN/figs/kdd_nsl/.gitignore
+ deps:
+ - path: kdd_nsl/plots/
+ hash: md5
+ md5: dc76f478efb0cbc46246b1ee240687fe.dir
+ size: 17691329
+ nfiles: 29
+ clean_merged:
+ cmd: python -m deckard.layers.clean_data -i combined/plots/merged.csv -o combined/plots/clean_merged.csv
+ -c conf/clean.yaml
+ deps:
+ - path: combined/plots/merged.csv
+ hash: md5
+ md5: 14b7b6d947a96066ff2ad028680511d5
+ size: 33462041
+ - path: conf/clean.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 3fdcad8f5751398ace2b94aaa74e4e18
+ size: 1023
params:
- conf/gzip_logistic.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.id}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
- params:
- +model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.fit_intercept: True,False
- +model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_logistic
- outs:
- - path: ddos/logs/gzip_logistic/20
+ conf/clean.yaml:
+ drop_values:
+ accuracy: 0.0
+ predict_time: 1.0
+ replace:
+ model.init.metric:
+ jaro: Jaro
+ _winkler: -Winkler
+ levenshtein: Levenshtein
+ ncd: NCD
+ ratio: Ratio
+ seqRatio: SeqRatio
+ hamming: Hamming
+ gzip: GZIP
+ pkl: Pickle
+ bz2: BZ2
+ zstd: ZSTD
+ lzma: LZMA
+ model_name:
+ GzipSVC: k-SVC
+ GzipLogisticRegressor: k-Logistic
+ GzipKNN: k-KNN
+ model.init.symmetric:
+ true: Symmetric
+ false: Asymmetric
+ model.init.sampling_method:
+ random: Random
+ medoid: Medoid
+ sum: Sum
+ svc: SVC
+ hardness: Hardness
+ nearmiss: NearMiss
+ knn: KNN
+ dataset:
+ ddos: DDoS
+ sms_spam: SMS Spam
+ kdd_nsl: KDD NSL
+ truthseeker: Truthseeker
+ model.init.m:
+ -1: 1
+ replace_cols:
+ dataset: Dataset
+ model.init.metric: Metric
+ model.init.symmetric: Symmetric
+ model.init.sampling_method: Condensing Method
+ model.init.m: Condensing Ratio
+ model_name: Model
+ outs:
+ - path: combined/plots/clean_merged.csv
+ hash: md5
+ md5: c156f464018e66193d396f270be55786
+ size: 33579589
+ data:
+ cmd: python data_prep.py
+ deps:
+ - path: data_prep.py
hash: md5
- md5: f2c036dc149976bc0de5187f8661669d.dir
- size: 1705246
- nfiles: 514
- - path: ddos/reports/gzip_logistic/20/train/
+ md5: 18244c921ed2d7cbf25b8362b3ca33aa
+ size: 5146
+ outs:
+ - path: raw_data/
hash: md5
- md5: 36eee9b3fb432eafed577ca45b477dab.dir
- size: 1608552
- nfiles: 1349
- grid_search@20-ddos-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20
- data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=ddos/logs/gzip_svc/20 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/20/study.csv
- files.directory=ddos files.reports=reports/gzip_svc/20 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_svc --multirun
+ md5: 33d46673e0631bef98be9e8991ed1ed1.dir
+ size: 50328647
+ nfiles: 8
+ parse_params:
+ cmd: python -m deckard.layers.parse
deps:
- - path: conf/gzip_svc.yaml
+ - path: conf/data/default.yaml
+ hash: md5
+ md5: 86639d6672cfd9529dda3e2ae4036c01
+ size: 22
+ - path: conf/default.yaml
+ hash: md5
+ md5: a0a533f84a7ffce197e0db5439219faf
+ size: 1504
+ - path: conf/files/default.yaml
+ hash: md5
+ md5: 7a2df5f8b98699376c3fb4da05d70dea
+ size: 306
+ - path: conf/model/default.yaml
+ hash: md5
+ md5: 39dc7512b1d19fea54550b080d880153
+ size: 27
+ - path: conf/scorers/default.yaml
+ hash: md5
+ md5: d8d00e7d284ea68b1244743dfef8f00c
+ size: 280
+ outs:
+ - path: params.yaml
hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
+ train:
+ cmd: python -m deckard.layers.experiment train
+ deps:
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
+ - path: raw_data/
+ hash: md5
+ md5: 33d46673e0631bef98be9e8991ed1ed1.dir
+ size: 50328647
+ nfiles: 8
params:
- conf/gzip_svc.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.id}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions:
- - maximize
- metric_names:
- - accuracy
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
- storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
- params:
- +model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.gamma: scale,auto
- +model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_svc
+ params.yaml:
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ dataset: kdd_nsl
+ device_id: cpu
+ files:
+ _target_: deckard.base.files.FileConfig
+ data_dir: data
+ data_type: .csv
+ directory: kdd_nsl
+ model_dir: model
+ name: default
+ params_file: params.yaml
+ predictions_file: predictions.json
+ reports: reports
+ score_dict_file: score_dict.json
+ model:
+ _target_: deckard.base.model.Model
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ init:
+ _target_: deckard.base.model.ModelInitializer
+ distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
+ k: 1
+ m: -1
+ metric: gzip
+ name: gzip_classifier.GzipKNN
+ symmetric: false
+ library: sklearn
+ model_name: gzip_knn
+ scorers:
+ _target_: deckard.base.scorer.ScorerDict
+ accuracy:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: maximize
+ name: sklearn.metrics.accuracy_score
+ log_loss:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: minimize
+ name: sklearn.metrics.log_loss
+ outs:
+ - path: kdd_nsl/reports/train/default/predictions.json
+ hash: md5
+ md5: 986d2f0abe9b96253b196a222a550609
+ size: 702
+ - path: kdd_nsl/reports/train/default/score_dict.json
+ hash: md5
+ md5: 492e1219d803759a686caa2859c91d21
+ size: 485
+ test_each_model@gzip-gzip_logistic-sms_spam-20:
+ cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_logistic/gzip/20
+ files.directory=sms_spam data=sms_spam data.sample.train_size=20 dataset=sms_spam
+ model=gzip_logistic model_name=gzip_knn model.init.metric=gzip model.init.m=-1
+ hydra.run.dir=sms_spam/logs/test_each_model/gzip_logistic/gzip/20 ++raise_exception=True '
+ deps:
+ - path: kdd_nsl/reports/train/default/score_dict.json
+ hash: md5
+ md5: ee4344da4a735fb0b6e6d2cf83ddef6e
+ size: 484
+ - path: params.yaml
+ hash: md5
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
+ params:
+ params.yaml:
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ dataset: kdd_nsl
+ device_id: cpu
+ files:
+ _target_: deckard.base.files.FileConfig
+ data_dir: data
+ data_type: .csv
+ directory: kdd_nsl
+ model_dir: model
+ name: default
+ params_file: params.yaml
+ predictions_file: predictions.json
+ reports: reports
+ score_dict_file: score_dict.json
+ model:
+ _target_: deckard.base.model.Model
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ init:
+ _target_: deckard.base.model.ModelInitializer
+ distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
+ k: 1
+ m: -1
+ metric: gzip
+ name: gzip_classifier.GzipKNN
+ symmetric: false
+ library: sklearn
+ model_name: gzip_knn
+ scorers:
+ _target_: deckard.base.scorer.ScorerDict
+ accuracy:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: maximize
+ name: sklearn.metrics.accuracy_score
+ log_loss:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: minimize
+ name: sklearn.metrics.log_loss
outs:
- - path: ddos/logs/gzip_svc/20
+ - path: sms_spam/logs/test_each_model/gzip_logistic/gzip/20
hash: md5
- md5: 5934a7b63c96844a0eaa9ecea06a79c2.dir
- size: 1639820
- nfiles: 514
- - path: ddos/reports/gzip_svc/20/train/
+ md5: d121a07eb6c0e96c7cd18fe1f2d0fbd6.dir
+ size: 7950
+ nfiles: 4
+ - path: sms_spam/reports/test_each_model/gzip_logistic/gzip/20/score_dict.json
hash: md5
- md5: 0e902831c38cc7b2f2b03d7bb7f4f5cf.dir
- size: 1580188
- nfiles: 1536
- grid_search@100-kdd_nsl-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=kdd_nsl/logs/gzip_knn/100 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/100/study.csv
- files.directory=kdd_nsl files.reports=reports/gzip_knn/100 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_knn --multirun
+ md5: 5d8bf090bc8e34df8ed01766adfca5eb
+ size: 26
+ test_each_model@gzip-gzip_knn-ddos-20:
+ cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_knn/gzip/20
+ files.directory=ddos data=ddos data.sample.train_size=20 dataset=ddos model=gzip_knn
+ model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=ddos/logs/test_each_model/gzip_knn/gzip/20
+ ++raise_exception=True '
deps:
- - path: conf/gzip_knn.yaml
+ - path: kdd_nsl/reports/train/default/score_dict.json
hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
+ md5: ee4344da4a735fb0b6e6d2cf83ddef6e
+ size: 484
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_knn.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- direction: ${direction}
- storage: sqlite:///optuna.db
- study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
- max_failure_rate: 1.0
- params:
- model.init.k: 1,3,5,7,11
- +model.init.weights: uniform,distance
- +model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
+ params.yaml:
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ dataset: kdd_nsl
+ device_id: cpu
+ files:
+ _target_: deckard.base.files.FileConfig
+ data_dir: data
+ data_type: .csv
+ directory: kdd_nsl
+ model_dir: model
+ name: default
+ params_file: params.yaml
+ predictions_file: predictions.json
+ reports: reports
+ score_dict_file: score_dict.json
+ model:
+ _target_: deckard.base.model.Model
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ init:
+ _target_: deckard.base.model.ModelInitializer
+ distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
+ k: 1
+ m: -1
+ metric: gzip
+ name: gzip_classifier.GzipKNN
+ symmetric: false
+ library: sklearn
model_name: gzip_knn
+ scorers:
+ _target_: deckard.base.scorer.ScorerDict
+ accuracy:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: maximize
+ name: sklearn.metrics.accuracy_score
+ log_loss:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: minimize
+ name: sklearn.metrics.log_loss
outs:
- - path: kdd_nsl/logs/gzip_knn/100
+ - path: ddos/logs/test_each_model/gzip_knn/gzip/20
hash: md5
- md5: aa2209bce9b2f829ca22f244b53ed58f.dir
- size: 1416182
- nfiles: 514
- - path: kdd_nsl/reports/gzip_knn/100/train/
+ md5: 3a4d1598b93a5a00ffd486b26a568475.dir
+ size: 7826
+ nfiles: 4
+ - path: ddos/reports/test_each_model/gzip_knn/gzip/20/score_dict.json
hash: md5
- md5: 1547fa66fbaac37a7badef9b300577a7.dir
- size: 1163933
- nfiles: 1000
- grid_search@100-kdd_nsl-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic
- model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_kdd_nsl
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/100
- hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/100/study.csv
- files.directory=kdd_nsl files.reports=reports/gzip_logistic/100 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_logistic --multirun
+ md5: 5d8bf090bc8e34df8ed01766adfca5eb
+ size: 26
+ test_each_model@gzip-gzip_svc-sms_spam-20:
+ cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_svc/gzip/20
+ files.directory=sms_spam data=sms_spam data.sample.train_size=20 dataset=sms_spam
+ model=gzip_svc model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=sms_spam/logs/test_each_model/gzip_svc/gzip/20
+ ++raise_exception=True '
deps:
- - path: conf/gzip_logistic.yaml
+ - path: kdd_nsl/reports/train/default/score_dict.json
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
+ md5: ee4344da4a735fb0b6e6d2cf83ddef6e
+ size: 484
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_logistic.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.id}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
- params:
- +model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.fit_intercept: True,False
- +model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_logistic
+ params.yaml:
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ dataset: kdd_nsl
+ device_id: cpu
+ files:
+ _target_: deckard.base.files.FileConfig
+ data_dir: data
+ data_type: .csv
+ directory: kdd_nsl
+ model_dir: model
+ name: default
+ params_file: params.yaml
+ predictions_file: predictions.json
+ reports: reports
+ score_dict_file: score_dict.json
+ model:
+ _target_: deckard.base.model.Model
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ init:
+ _target_: deckard.base.model.ModelInitializer
+ distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
+ k: 1
+ m: -1
+ metric: gzip
+ name: gzip_classifier.GzipKNN
+ symmetric: false
+ library: sklearn
+ model_name: gzip_knn
+ scorers:
+ _target_: deckard.base.scorer.ScorerDict
+ accuracy:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: maximize
+ name: sklearn.metrics.accuracy_score
+ log_loss:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: minimize
+ name: sklearn.metrics.log_loss
outs:
- - path: kdd_nsl/logs/gzip_logistic/100
+ - path: sms_spam/logs/test_each_model/gzip_svc/gzip/20
hash: md5
- md5: b6e7cf1d3984f8029177576f9668944b.dir
- size: 1609157
- nfiles: 514
- - path: kdd_nsl/reports/gzip_logistic/100/train/
+ md5: ac59a56d56834986ab013ff5cb6b4448.dir
+ size: 7861
+ nfiles: 4
+ - path: sms_spam/reports/test_each_model/gzip_svc/gzip/20/score_dict.json
hash: md5
- md5: d40db4814c403a903c7d0cd2a8a5bb7b.dir
- size: 1329546
- nfiles: 1093
- grid_search@100-kdd_nsl-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=kdd_nsl/logs/gzip_svc/100 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/100/study.csv
- files.directory=kdd_nsl files.reports=reports/gzip_svc/100 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_svc --multirun
+ md5: 5d8bf090bc8e34df8ed01766adfca5eb
+ size: 26
+ test_each_model@gzip-gzip_knn-sms_spam-20:
+ cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_knn/gzip/20
+ files.directory=sms_spam data=sms_spam data.sample.train_size=20 dataset=sms_spam
+ model=gzip_knn model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=sms_spam/logs/test_each_model/gzip_knn/gzip/20
+ ++raise_exception=True '
deps:
- - path: conf/gzip_svc.yaml
+ - path: kdd_nsl/reports/train/default/score_dict.json
hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
+ md5: ee4344da4a735fb0b6e6d2cf83ddef6e
+ size: 484
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_svc.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.id}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions:
- - maximize
- metric_names:
- - accuracy
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
- storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
- params:
- +model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.gamma: scale,auto
- +model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_svc
+ params.yaml:
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ dataset: kdd_nsl
+ device_id: cpu
+ files:
+ _target_: deckard.base.files.FileConfig
+ data_dir: data
+ data_type: .csv
+ directory: kdd_nsl
+ model_dir: model
+ name: default
+ params_file: params.yaml
+ predictions_file: predictions.json
+ reports: reports
+ score_dict_file: score_dict.json
+ model:
+ _target_: deckard.base.model.Model
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ init:
+ _target_: deckard.base.model.ModelInitializer
+ distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
+ k: 1
+ m: -1
+ metric: gzip
+ name: gzip_classifier.GzipKNN
+ symmetric: false
+ library: sklearn
+ model_name: gzip_knn
+ scorers:
+ _target_: deckard.base.scorer.ScorerDict
+ accuracy:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: maximize
+ name: sklearn.metrics.accuracy_score
+ log_loss:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: minimize
+ name: sklearn.metrics.log_loss
outs:
- - path: kdd_nsl/logs/gzip_svc/100
+ - path: sms_spam/logs/test_each_model/gzip_knn/gzip/20
hash: md5
- md5: 4b96e2a3bb0e0d230ebd96591a16e441.dir
- size: 1553624
- nfiles: 514
- - path: kdd_nsl/reports/gzip_svc/100/train/
+ md5: 4eaee5c6d9a4ad7d474938026f330e8c.dir
+ size: 7858
+ nfiles: 4
+ - path: sms_spam/reports/test_each_model/gzip_knn/gzip/20/score_dict.json
hash: md5
- md5: 3cf8a86de1026ead8fcd1b6cda47e910.dir
- size: 1247698
- nfiles: 1152
- grid_search@100-truthseeker-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=truthseeker/logs/gzip_knn/100 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/100/study.csv
- files.directory=truthseeker files.reports=reports/gzip_knn/100 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_knn --multirun
+ md5: 5d8bf090bc8e34df8ed01766adfca5eb
+ size: 26
+ test_each_model@gzip-gzip_svc-truthseeker-20:
+ cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_svc/gzip/20
+ files.directory=truthseeker data=truthseeker data.sample.train_size=20 dataset=truthseeker
+ model=gzip_svc model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=truthseeker/logs/test_each_model/gzip_svc/gzip/20
+ ++raise_exception=True '
deps:
- - path: conf/gzip_knn.yaml
+ - path: kdd_nsl/reports/train/default/score_dict.json
hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
+ md5: ee4344da4a735fb0b6e6d2cf83ddef6e
+ size: 484
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_knn.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- direction: ${direction}
- storage: sqlite:///optuna.db
- study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
- max_failure_rate: 1.0
- params:
- model.init.k: 1,3,5,7,11
- +model.init.weights: uniform,distance
- +model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
+ params.yaml:
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ dataset: kdd_nsl
+ device_id: cpu
+ files:
+ _target_: deckard.base.files.FileConfig
+ data_dir: data
+ data_type: .csv
+ directory: kdd_nsl
+ model_dir: model
+ name: default
+ params_file: params.yaml
+ predictions_file: predictions.json
+ reports: reports
+ score_dict_file: score_dict.json
+ model:
+ _target_: deckard.base.model.Model
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ init:
+ _target_: deckard.base.model.ModelInitializer
+ distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
+ k: 1
+ m: -1
+ metric: gzip
+ name: gzip_classifier.GzipKNN
+ symmetric: false
+ library: sklearn
model_name: gzip_knn
+ scorers:
+ _target_: deckard.base.scorer.ScorerDict
+ accuracy:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: maximize
+ name: sklearn.metrics.accuracy_score
+ log_loss:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: minimize
+ name: sklearn.metrics.log_loss
outs:
- - path: truthseeker/logs/gzip_knn/100
- hash: md5
- md5: 818cba0a8349442987e5d6be1f0672d4.dir
- size: 1374869
- nfiles: 514
- - path: truthseeker/reports/gzip_knn/100/train/
- hash: md5
- md5: 261a37d5d497bd477d872aa72a94a13f.dir
- size: 394446
- nfiles: 320
- grid_search@100-truthseeker-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic
- model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_truthseeker
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/100
- hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/100/study.csv
- files.directory=truthseeker files.reports=reports/gzip_logistic/100 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_logistic --multirun
- deps:
- - path: conf/gzip_logistic.yaml
- hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- conf/gzip_logistic.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.id}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
- params:
- +model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.fit_intercept: True,False
- +model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_logistic
- outs:
- - path: truthseeker/logs/gzip_logistic/100
+ - path: truthseeker/logs/test_each_model/gzip_svc/gzip/20
hash: md5
- md5: dd822b92438871be421644a82afa8e2f.dir
- size: 1528739
- nfiles: 514
- - path: truthseeker/reports/gzip_logistic/100/train/
+ md5: 5fb0774e1c5387d988a28d68900d7d02.dir
+ size: 7924
+ nfiles: 4
+ - path: truthseeker/reports/test_each_model/gzip_svc/gzip/20/score_dict.json
hash: md5
- md5: d1b22149466a949b86aba9390d7cf992.dir
- size: 556386
- nfiles: 365
- grid_search@100-truthseeker-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=truthseeker/logs/gzip_svc/100 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/100/study.csv
- files.directory=truthseeker files.reports=reports/gzip_svc/100 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_svc --multirun
+ md5: 5d8bf090bc8e34df8ed01766adfca5eb
+ size: 26
+ test_each_model@gzip-gzip_logistic-kdd_nsl-20:
+ cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_logistic/gzip/20
+ files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
+ model=gzip_logistic model_name=gzip_knn model.init.metric=gzip model.init.m=-1
+ hydra.run.dir=kdd_nsl/logs/test_each_model/gzip_logistic/gzip/20 ++raise_exception=True '
deps:
- - path: conf/gzip_svc.yaml
+ - path: kdd_nsl/reports/train/default/score_dict.json
hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
+ md5: ee4344da4a735fb0b6e6d2cf83ddef6e
+ size: 484
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_svc.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.id}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions:
- - maximize
- metric_names:
- - accuracy
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
- storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
- params:
- +model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.gamma: scale,auto
- +model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_svc
+ params.yaml:
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ dataset: kdd_nsl
+ device_id: cpu
+ files:
+ _target_: deckard.base.files.FileConfig
+ data_dir: data
+ data_type: .csv
+ directory: kdd_nsl
+ model_dir: model
+ name: default
+ params_file: params.yaml
+ predictions_file: predictions.json
+ reports: reports
+ score_dict_file: score_dict.json
+ model:
+ _target_: deckard.base.model.Model
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ init:
+ _target_: deckard.base.model.ModelInitializer
+ distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
+ k: 1
+ m: -1
+ metric: gzip
+ name: gzip_classifier.GzipKNN
+ symmetric: false
+ library: sklearn
+ model_name: gzip_knn
+ scorers:
+ _target_: deckard.base.scorer.ScorerDict
+ accuracy:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: maximize
+ name: sklearn.metrics.accuracy_score
+ log_loss:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: minimize
+ name: sklearn.metrics.log_loss
outs:
- - path: truthseeker/logs/gzip_svc/100
+ - path: kdd_nsl/logs/test_each_model/gzip_logistic/gzip/20
hash: md5
- md5: c9493ae71545ccec0ea01adc6d664bce.dir
- size: 1505603
- nfiles: 514
- - path: truthseeker/reports/gzip_svc/100/train/
+ md5: ec6c44a8421f7cb02994bafbb0ceb59d.dir
+ size: 7980
+ nfiles: 4
+ - path: kdd_nsl/reports/test_each_model/gzip_logistic/gzip/20/score_dict.json
hash: md5
- md5: c9a4bae4aed04fcdb578f44fba94af87.dir
- size: 547282
- nfiles: 384
- grid_search@100-sms_spam-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=sms_spam/logs/gzip_knn/100 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/100/study.csv
- files.directory=sms_spam files.reports=reports/gzip_knn/100 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_knn --multirun
+ md5: 5d8bf090bc8e34df8ed01766adfca5eb
+ size: 26
+ test_each_model@gzip-gzip_logistic-truthseeker-20:
+ cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_logistic/gzip/20
+ files.directory=truthseeker data=truthseeker data.sample.train_size=20 dataset=truthseeker
+ model=gzip_logistic model_name=gzip_knn model.init.metric=gzip model.init.m=-1
+ hydra.run.dir=truthseeker/logs/test_each_model/gzip_logistic/gzip/20 ++raise_exception=True '
deps:
- - path: conf/gzip_knn.yaml
+ - path: kdd_nsl/reports/train/default/score_dict.json
hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
+ md5: ee4344da4a735fb0b6e6d2cf83ddef6e
+ size: 484
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_knn.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- direction: ${direction}
- storage: sqlite:///optuna.db
- study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
- max_failure_rate: 1.0
- params:
- model.init.k: 1,3,5,7,11
- +model.init.weights: uniform,distance
- +model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
+ params.yaml:
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ dataset: kdd_nsl
+ device_id: cpu
+ files:
+ _target_: deckard.base.files.FileConfig
+ data_dir: data
+ data_type: .csv
+ directory: kdd_nsl
+ model_dir: model
+ name: default
+ params_file: params.yaml
+ predictions_file: predictions.json
+ reports: reports
+ score_dict_file: score_dict.json
+ model:
+ _target_: deckard.base.model.Model
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ init:
+ _target_: deckard.base.model.ModelInitializer
+ distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
+ k: 1
+ m: -1
+ metric: gzip
+ name: gzip_classifier.GzipKNN
+ symmetric: false
+ library: sklearn
model_name: gzip_knn
+ scorers:
+ _target_: deckard.base.scorer.ScorerDict
+ accuracy:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: maximize
+ name: sklearn.metrics.accuracy_score
+ log_loss:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: minimize
+ name: sklearn.metrics.log_loss
outs:
- - path: sms_spam/logs/gzip_knn/100
- hash: md5
- md5: ad8714bbbce96d2c1ff75deda0add5ec.dir
- size: 1415136
- nfiles: 514
- - path: sms_spam/reports/gzip_knn/100/train/
- hash: md5
- md5: 6bcf048da228e84a757916c797891044.dir
- size: 376546
- nfiles: 331
- find_best_model@ddos-gzip_knn:
- cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name
- gzip_knn_ddos --config_subdir model --params_file best_gzip_knn_ddos --default_config
- gzip_knn
- deps:
- - path: ddos/logs/gzip_knn/
- hash: md5
- md5: d2c6441e85e3509b8968240a48196d07.dir
- size: 4193267
- nfiles: 1542
- outs:
- - path: conf/model/best_gzip_knn_ddos.yaml
- hash: md5
- md5: bdea475d3a2bc59106f27dccd0fc27fc
- size: 419
- find_best_model@ddos-gzip_svc:
- cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name
- gzip_svc_ddos --config_subdir model --params_file best_gzip_svc_ddos --default_config
- gzip_svc
- deps:
- - path: ddos/logs/gzip_svc/
- hash: md5
- md5: 78cd23f301a93a7c9842abb061e3cc7b.dir
- size: 7447727
- nfiles: 2570
- outs:
- - path: conf/model/best_gzip_svc_ddos.yaml
- hash: md5
- md5: 3a7f27dd470ec9e55c10403814f550f2
- size: 442
- find_best_model@ddos-gzip_logistic:
- cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name
- gzip_logistic_ddos --config_subdir model --params_file best_gzip_logistic_ddos
- --default_config gzip_logistic
- deps:
- - path: ddos/logs/gzip_logistic/
+ - path: truthseeker/logs/test_each_model/gzip_logistic/gzip/20
hash: md5
- md5: b28cadbd10b9bbe40802e39b1beaee18.dir
- size: 6561328
- nfiles: 2056
- outs:
- - path: conf/model/best_gzip_logistic_ddos.yaml
+ md5: 2ade09315cc26a4d65dbc22a657bfdec.dir
+ size: 8013
+ nfiles: 4
+ - path: truthseeker/reports/test_each_model/gzip_logistic/gzip/20/score_dict.json
hash: md5
- md5: d5e603d6386dd6cf1167088eaecbdde5
- size: 498
- condense@ddos-knn:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100
- data.sample.test_size=100 model_name=condensed_knn model=gzip_knn files.directory=ddos
- files.reports=reports/condense/knn/ hydra.sweeper.study_name=condense_knn_ddos
- hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/knn/
- hydra.callbacks.study_dump.output_file=ddos/logs/knn/study.csv hydra.launcher.n_jobs=-1
- --config-name condense_knn --multirun
+ md5: 5d8bf090bc8e34df8ed01766adfca5eb
+ size: 26
+ test_each_model@gzip-gzip_svc-kdd_nsl-20:
+ cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_svc/gzip/20
+ files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl
+ model=gzip_svc model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_model/gzip_svc/gzip/20
+ ++raise_exception=True '
deps:
- - path: conf/condense_knn.yaml
+ - path: kdd_nsl/reports/train/default/score_dict.json
hash: md5
- md5: abd25d17a742e467d39dda34b448ba88
- size: 2181
+ md5: ee4344da4a735fb0b6e6d2cf83ddef6e
+ size: 484
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/condense.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/condense/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
- direction: ${direction}
- max_failure_rate: 1.0
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
+ params.yaml:
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ dataset: kdd_nsl
+ device_id: cpu
+ files:
+ _target_: deckard.base.files.FileConfig
+ data_dir: data
+ data_type: .csv
+ directory: kdd_nsl
+ model_dir: model
+ name: default
+ params_file: params.yaml
+ predictions_file: predictions.json
+ reports: reports
+ score_dict_file: score_dict.json
+ model:
+ _target_: deckard.base.model.Model
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ init:
+ _target_: deckard.base.model.ModelInitializer
+ distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
+ k: 1
+ m: -1
+ metric: gzip
+ name: gzip_classifier.GzipKNN
+ symmetric: false
+ library: sklearn
+ model_name: gzip_knn
+ scorers:
+ _target_: deckard.base.scorer.ScorerDict
+ accuracy:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: maximize
+ name: sklearn.metrics.accuracy_score
+ log_loss:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: minimize
+ name: sklearn.metrics.log_loss
outs:
- - path: ddos/logs/condense/knn/
+ - path: kdd_nsl/logs/test_each_model/gzip_svc/gzip/20
hash: md5
- md5: 34f8b7196af71d106965513050a254fb.dir
- size: 10910937
- nfiles: 4097
- - path: ddos/reports/condense/knn/
+ md5: 80e1fe29c22203d01027107088979db9.dir
+ size: 7891
+ nfiles: 4
+ - path: kdd_nsl/reports/test_each_model/gzip_svc/gzip/20/score_dict.json
hash: md5
- md5: 9b6918814be3bea732abc71b8684fd8d.dir
- size: 8458502
- nfiles: 9157
- condense@ddos-svc:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100
- data.sample.test_size=100 model_name=condensed_svc model=gzip_svc files.directory=ddos
- files.reports=reports/condense/svc/ hydra.sweeper.study_name=condense_svc_ddos
- hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/svc/
- hydra.callbacks.study_dump.output_file=ddos/logs/svc/study.csv hydra.launcher.n_jobs=-1
- --config-name condense_svc --multirun
+ md5: 5d8bf090bc8e34df8ed01766adfca5eb
+ size: 26
+ test_each_model@gzip-gzip_knn-truthseeker-20:
+ cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_knn/gzip/20
+ files.directory=truthseeker data=truthseeker data.sample.train_size=20 dataset=truthseeker
+ model=gzip_knn model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=truthseeker/logs/test_each_model/gzip_knn/gzip/20
+ ++raise_exception=True '
deps:
- - path: conf/model/best_gzip_svc_ddos.yaml
+ - path: kdd_nsl/reports/train/default/score_dict.json
hash: md5
- md5: 3a7f27dd470ec9e55c10403814f550f2
- size: 442
- - path: ddos/logs/method/
+ md5: ee4344da4a735fb0b6e6d2cf83ddef6e
+ size: 484
+ - path: params.yaml
hash: md5
- md5: a09dd0467b0e8a142d6f32a38f205159.dir
- size: 59399
- nfiles: 28
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/condense.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/condense/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
- direction: ${direction}
- max_failure_rate: 1.0
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
+ params.yaml:
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ dataset: kdd_nsl
+ device_id: cpu
+ files:
+ _target_: deckard.base.files.FileConfig
+ data_dir: data
+ data_type: .csv
+ directory: kdd_nsl
+ model_dir: model
+ name: default
+ params_file: params.yaml
+ predictions_file: predictions.json
+ reports: reports
+ score_dict_file: score_dict.json
+ model:
+ _target_: deckard.base.model.Model
+ data:
+ _target_: deckard.base.data.Data
+ drop:
+ - id
+ name: raw_data/kdd_nsl_undersampled_5000.csv
+ sample:
+ _target_: deckard.base.data.SklearnDataSampler
+ random_state: 0
+ stratify: true
+ test_size: 100
+ train_size: 100
+ target: label
+ init:
+ _target_: deckard.base.model.ModelInitializer
+ distance_matrix: kdd_nsl/model/gzip/100-100/0.npz
+ k: 1
+ m: -1
+ metric: gzip
+ name: gzip_classifier.GzipKNN
+ symmetric: false
+ library: sklearn
+ model_name: gzip_knn
+ scorers:
+ _target_: deckard.base.scorer.ScorerDict
+ accuracy:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: maximize
+ name: sklearn.metrics.accuracy_score
+ log_loss:
+ _target_: deckard.base.scorer.ScorerConfig
+ direction: minimize
+ name: sklearn.metrics.log_loss
outs:
- - path: ddos/logs/condense/svc/
+ - path: truthseeker/logs/test_each_model/gzip_knn/gzip/20
hash: md5
- md5: 6a15cfc205c7382b8d7d6d67d35ddfb0.dir
- size: 11072739
- nfiles: 4097
- - path: ddos/reports/condense/svc/
+ md5: e1b4842686f73992f04e9104eab3e88f.dir
+ size: 7921
+ nfiles: 4
+ - path: truthseeker/reports/test_each_model/gzip_knn/gzip/20/score_dict.json
hash: md5
- md5: daaf428c939e9bfcc233bf88ee39f9fb.dir
- size: 2819182
- nfiles: 3072
- condense@ddos-logistic:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100
- data.sample.test_size=100 model_name=condensed_logistic model=gzip_logistic
- files.directory=ddos files.reports=reports/condense/logistic/ hydra.sweeper.study_name=condense_logistic_ddos
- hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/logistic/
- hydra.callbacks.study_dump.output_file=ddos/logs/logistic/study.csv hydra.launcher.n_jobs=-1
- --config-name condense_logistic --multirun
+ md5: 5d8bf090bc8e34df8ed01766adfca5eb
+ size: 26
+ grid_search@20-ddos-gzip_knn-true:
+ cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20
+ data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_knn/20/symmetry_true hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/20/study.csv
+ files.directory=ddos files.reports=reports/gzip_knn/20/symmetry_true hydra.launcher.n_jobs=-1
+ ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_knn --multirun
deps:
- - path: conf/model/best_gzip_logistic_ddos.yaml
+ - path: conf/gzip_knn.yaml
hash: md5
- md5: d5e603d6386dd6cf1167088eaecbdde5
- size: 498
- - path: ddos/logs/method/
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
+ - path: params.yaml
hash: md5
- md5: a09dd0467b0e8a142d6f32a38f205159.dir
- size: 59399
- nfiles: 28
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/condense.yaml:
+ conf/gzip_knn.yaml:
hydra:
run:
- dir: ${dataset}/logs/condense/
+ dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
subdir: ${hydra.job.num}
@@ -15530,26 +5610,26 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
direction: ${direction}
+ storage: sqlite:///optuna.db
+ study_name: ${dataset}_${model_name}_${stage}
+ n_trials: 128
+ n_jobs: 8
max_failure_rate: 1.0
params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
+ model.init.k: 1,3,5,7,11
+ +model.init.weights: uniform,distance
+ +model.init.algorithm: brute
+ model_name: ${model_name}
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -15562,32 +5642,34 @@ stages:
max_nbytes: 100000
mmap_mode: r
outs:
- - path: ddos/logs/condense/logistic/
+ - path: ddos/logs/gzip_knn/20/symmetry_true
hash: md5
- md5: 064e5768d0155635c9bc6287914ac9f7.dir
- size: 11690343
- nfiles: 4097
- - path: ddos/reports/condense/logistic/
+ md5: 75a67061f3d261f90a32e2e342a26049.dir
+ size: 1201059
+ nfiles: 513
+ - path: ddos/reports/gzip_knn/20/symmetry_true/train/
hash: md5
- md5: 7ce841278929a90690417685b7c7f143.dir
- size: 5929815
- nfiles: 5888
- grid_search@100-ddos-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100
+ md5: 410d4dc9dc529c85056cea27da5fc34f.dir
+ size: 328616
+ nfiles: 369
+ grid_search@20-ddos-gzip_knn-false:
+ cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20
data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=ddos/logs/gzip_knn/100 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/100/study.csv
- files.directory=ddos files.reports=reports/gzip_knn/100 hydra.launcher.n_jobs=-1
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_knn/20/symmetry_false
+ hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/20/study.csv files.directory=ddos
+ files.reports=reports/gzip_knn/20/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
++raise_exception=True --config-name gzip_knn --multirun
deps:
- path: conf/gzip_knn.yaml
hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
conf/gzip_knn.yaml:
hydra:
@@ -15607,30 +5689,26 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
direction: ${direction}
storage: sqlite:///optuna.db
study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
+ n_trials: 128
+ n_jobs: 8
max_failure_rate: 1.0
params:
model.init.k: 1,3,5,7,11
+model.init.weights: uniform,distance
+model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -15642,34 +5720,36 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_knn
outs:
- - path: ddos/logs/gzip_knn/100
+ - path: ddos/logs/gzip_knn/20/symmetry_false
hash: md5
- md5: 41af522bae6f35684d51a90652c37082.dir
- size: 1645388
- nfiles: 514
- - path: ddos/reports/gzip_knn/100/train/
+ md5: 5511994182145eb3145fd3afc672d1a5.dir
+ size: 1200638
+ nfiles: 513
+ - path: ddos/reports/gzip_knn/20/symmetry_false/train/
hash: md5
- md5: b9374a5acb2480c2ed6a35803a344f69.dir
- size: 1341749
- nfiles: 1499
- grid_search@100-ddos-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100
+ md5: b507e62340bddb44dd3e66467a23444a.dir
+ size: 328838
+ nfiles: 369
+ grid_search@20-ddos-gzip_logistic-true:
+ cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20
data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=ddos/logs/gzip_logistic/100 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/100/study.csv
- files.directory=ddos files.reports=reports/gzip_logistic/100 hydra.launcher.n_jobs=-1
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_logistic/20/symmetry_true
+ hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/20/study.csv
+ files.directory=ddos files.reports=reports/gzip_logistic/20/symmetry_true hydra.launcher.n_jobs=-1
+ ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
++raise_exception=True --config-name gzip_logistic --multirun
deps:
- path: conf/gzip_logistic.yaml
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
conf/gzip_logistic.yaml:
hydra:
@@ -15689,31 +5769,27 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
+ n_trials: 128
+ n_jobs: 8
params:
+model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+model.init.fit_intercept: True,False
+model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -15727,36 +5803,38 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_logistic
outs:
- - path: ddos/logs/gzip_logistic/100
+ - path: ddos/logs/gzip_logistic/20/symmetry_true
hash: md5
- md5: 3f1d14c70e73f668316f86a8d7d0e22b.dir
- size: 1733688
- nfiles: 514
- - path: ddos/reports/gzip_logistic/100/train/
+ md5: 7411fc1827bfc3df75c9106a4288ee8d.dir
+ size: 1262132
+ nfiles: 513
+ - path: ddos/reports/gzip_logistic/20/symmetry_true/train/
hash: md5
- md5: c839c1faf70de47c057714c3a8bdc52d.dir
- size: 1562420
- nfiles: 1315
- grid_search@100-ddos-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100
- data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=ddos/logs/gzip_svc/100 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/100/study.csv
- files.directory=ddos files.reports=reports/gzip_svc/100 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_svc --multirun
+ md5: 72358a4a9191f8e02e2d9348e7bfa5be.dir
+ size: 601313
+ nfiles: 356
+ grid_search@20-ddos-gzip_logistic-false:
+ cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20
+ data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_logistic/20/symmetry_false
+ hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/20/study.csv
+ files.directory=ddos files.reports=reports/gzip_logistic/20/symmetry_false hydra.launcher.n_jobs=-1
+ ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_logistic --multirun
deps:
- - path: conf/gzip_svc.yaml
+ - path: conf/gzip_logistic.yaml
hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_svc.yaml:
+ conf/gzip_logistic.yaml:
hydra:
run:
dir: ${dataset}/logs/${stage}/
@@ -15768,37 +5846,33 @@ stages:
_target_: database.OptunaStudyDumpCallback
storage: ${hydra.sweeper.storage}
study_name: ${hydra.sweeper.study_name}
- directions:
- - maximize
- metric_names:
- - accuracy
+ directions: ${direction}
+ metric_names: ${optimizers}
output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
params:
- +model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.gamma: scale,auto
- +model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ +model.init.solver: saga
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.fit_intercept: True,False
+ +model.init.class_weight: balanced,None
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -15812,159 +5886,77 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_svc
- outs:
- - path: ddos/logs/gzip_svc/100
- hash: md5
- md5: 4adc8c896e06e2d7e8450f2b863b95bf.dir
- size: 1681042
- nfiles: 514
- - path: ddos/reports/gzip_svc/100/train/
- hash: md5
- md5: 8ad9bbb8a118699458753528a263f5ba.dir
- size: 1790102
- nfiles: 1678
- find_best_model@kdd_nsl-gzip_knn:
- cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name
- gzip_knn_kdd_nsl --config_subdir model --params_file best_gzip_knn_kdd_nsl --default_config
- gzip_knn
- deps:
- - path: kdd_nsl/logs/gzip_knn/
- hash: md5
- md5: 6418750af32f15be9c6f35e0975b3276.dir
- size: 4024441
- nfiles: 1542
outs:
- - path: conf/model/best_gzip_knn_kdd_nsl.yaml
- hash: md5
- md5: f9ad25a19931041146b4b1eab45fda68
- size: 420
- find_best_model@kdd_nsl-gzip_svc:
- cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name
- gzip_svc_kdd_nsl --config_subdir model --params_file best_gzip_svc_kdd_nsl --default_config
- gzip_svc
- deps:
- - path: kdd_nsl/logs/gzip_svc/
- hash: md5
- md5: 381879c377b6eeccbb9d1aa42f78fec2.dir
- size: 4366326
- nfiles: 1542
- outs:
- - path: conf/model/best_gzip_svc_kdd_nsl.yaml
- hash: md5
- md5: 0542c20ce7b5a74a20d4ab1c38fdf213
- size: 434
- find_best_model@kdd_nsl-gzip_logistic:
- cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name
- gzip_logistic_kdd_nsl --config_subdir model --params_file best_gzip_logistic_kdd_nsl
- --default_config gzip_logistic
- deps:
- - path: kdd_nsl/logs/gzip_logistic/
+ - path: ddos/logs/gzip_logistic/20/symmetry_false
hash: md5
- md5: 34325e24d16a4af0ec3286ec4b034e14.dir
- size: 4504884
- nfiles: 1542
- outs:
- - path: conf/model/best_gzip_logistic_kdd_nsl.yaml
- hash: md5
- md5: e21d828b4b1ad122d7755e986de5b93d
- size: 353
- find_best_model@sms_spam-gzip_knn:
- cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name
- gzip_knn_sms_spam --config_subdir model --params_file best_gzip_knn_sms_spam
- --default_config gzip_knn
- deps:
- - path: sms_spam/logs/gzip_knn/
- hash: md5
- md5: 689c69db8c621101649ddef5bd0c1bb5.dir
- size: 2713750
- nfiles: 1028
- outs:
- - path: conf/model/best_gzip_knn_sms_spam.yaml
- hash: md5
- md5: 41fad710bcb8b8b8dd548d669b2ed748
- size: 419
- find_best_model@sms_spam-gzip_svc:
- cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name
- gzip_svc_sms_spam --config_subdir model --params_file best_gzip_svc_sms_spam
- --default_config gzip_svc
- deps:
- - path: sms_spam/logs/gzip_svc/
+ md5: 49dbe43b3f37ddc7ac2ae83c9022067e.dir
+ size: 1243003
+ nfiles: 513
+ - path: ddos/reports/gzip_logistic/20/symmetry_false/train/
hash: md5
- md5: b91e15f0eb5ee57aed8aeb5a5d6feeab.dir
- size: 2777710
- nfiles: 1028
- outs:
- - path: conf/model/best_gzip_svc_sms_spam.yaml
- hash: md5
- md5: bb3008613c3311a696d32fb683732c00
- size: 442
- find_best_model@sms_spam-gzip_logistic:
- cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name
- gzip_logistic_sms_spam --config_subdir model --params_file best_gzip_logistic_sms_spam
- --default_config gzip_logistic
+ md5: 311ef4395865656e00f5428c8f98b19a.dir
+ size: 616599
+ nfiles: 340
+ grid_search@20-ddos-gzip_svc-true:
+ cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20
+ data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_svc/20/symmetry_true hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/20/study.csv
+ files.directory=ddos files.reports=reports/gzip_svc/20/symmetry_true hydra.launcher.n_jobs=-1
+ ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_svc --multirun
deps:
- - path: sms_spam/logs/gzip_logistic/
- hash: md5
- md5: 89191dbe147b40192129776ef2652900.dir
- size: 1649284
- nfiles: 578
- outs:
- - path: conf/model/best_gzip_logistic_sms_spam.yaml
+ - path: conf/gzip_svc.yaml
hash: md5
- md5: fd1d0481be57844d935aea28e995a369
- size: 485
- condense@kdd_nsl-knn:
- cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_knn
- model=gzip_knn files.directory=kdd_nsl files.reports=reports/condense/knn/ hydra.sweeper.study_name=condense_knn_kdd_nsl
- hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/condense/knn/
- hydra.callbacks.study_dump.output_file=kdd_nsl/logs/knn/study.csv hydra.launcher.n_jobs=-1
- --config-name condense_knn --multirun
- deps:
- - path: conf/model/best_gzip_knn_kdd_nsl.yaml
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
+ - path: params.yaml
hash: md5
- md5: f9ad25a19931041146b4b1eab45fda68
- size: 420
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/condense.yaml:
+ conf/gzip_svc.yaml:
hydra:
run:
- dir: ${dataset}/logs/condense/
+ dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
- subdir: ${hydra.job.num}
+ subdir: ${hydra.job.id}
callbacks:
study_dump:
_target_: database.OptunaStudyDumpCallback
storage: ${hydra.sweeper.storage}
study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
+ directions:
+ - maximize
+ metric_names:
+ - accuracy
output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
+ study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
+ params:
+ +model.init.kernel: rbf,precomputed
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.gamma: scale,auto
+ +model.init.class_weight: balanced,null
+ model_name: ${model_name}
direction: ${direction}
max_failure_rate: 1.0
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -15977,67 +5969,76 @@ stages:
max_nbytes: 100000
mmap_mode: r
outs:
- - path: kdd_nsl/logs/condense/knn/
+ - path: ddos/logs/gzip_svc/20/symmetry_true
hash: md5
- md5: 81f50250e51650881283dcf68d43234c.dir
- size: 10952920
- nfiles: 4097
- - path: kdd_nsl/reports/condense/knn/
+ md5: 51fb64b0b4069b3a551837dd9602b50c.dir
+ size: 1235122
+ nfiles: 513
+ - path: ddos/reports/gzip_svc/20/symmetry_true/train/
hash: md5
- md5: 3f8eb680f1f8960490e4581bfa16cfd2.dir
- size: 2869636
- nfiles: 3011
- condense@kdd_nsl-svc:
- cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_svc
- model=gzip_svc files.directory=kdd_nsl files.reports=reports/condense/svc/ hydra.sweeper.study_name=condense_svc_kdd_nsl
- hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/condense/svc/
- hydra.callbacks.study_dump.output_file=kdd_nsl/logs/svc/study.csv hydra.launcher.n_jobs=-1
- --config-name condense_svc --multirun
+ md5: 22b4b6a8d2e3861aedf0e4f43917ba72.dir
+ size: 551301
+ nfiles: 384
+ grid_search@20-ddos-gzip_svc-false:
+ cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20
+ data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_svc/20/symmetry_false
+ hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/20/study.csv files.directory=ddos
+ files.reports=reports/gzip_svc/20/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_svc --multirun
deps:
- - path: conf/model/best_gzip_svc_kdd_nsl.yaml
+ - path: conf/gzip_svc.yaml
+ hash: md5
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
+ - path: params.yaml
hash: md5
- md5: 0542c20ce7b5a74a20d4ab1c38fdf213
- size: 434
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/condense.yaml:
+ conf/gzip_svc.yaml:
hydra:
run:
- dir: ${dataset}/logs/condense/
+ dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
- subdir: ${hydra.job.num}
+ subdir: ${hydra.job.id}
callbacks:
study_dump:
_target_: database.OptunaStudyDumpCallback
storage: ${hydra.sweeper.storage}
study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
+ directions:
+ - maximize
+ metric_names:
+ - accuracy
output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
+ study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
+ params:
+ +model.init.kernel: rbf,precomputed
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.gamma: scale,auto
+ +model.init.class_weight: balanced,null
+ model_name: ${model_name}
direction: ${direction}
max_failure_rate: 1.0
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -16050,33 +6051,39 @@ stages:
max_nbytes: 100000
mmap_mode: r
outs:
- - path: kdd_nsl/logs/condense/svc/
+ - path: ddos/logs/gzip_svc/20/symmetry_false
hash: md5
- md5: cdf319e0c94e4c6eda84ec9b2e9ea1a9.dir
- size: 10708020
- nfiles: 4097
- - path: kdd_nsl/reports/condense/svc/
+ md5: 2440c70c069be012281ec7412d211422.dir
+ size: 1234738
+ nfiles: 513
+ - path: ddos/reports/gzip_svc/20/symmetry_false/train/
hash: md5
- md5: ad27897c6454024915fdcef827219bd3.dir
- size: 8340639
- nfiles: 5462
- condense@kdd_nsl-logistic:
+ md5: 83c44eacdc2b26fd6264cfb781ea7c54.dir
+ size: 551571
+ nfiles: 384
+ grid_search@20-kdd_nsl-gzip_knn-true:
cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_logistic
- model=gzip_logistic files.directory=kdd_nsl files.reports=reports/condense/logistic/
- hydra.sweeper.study_name=condense_logistic_kdd_nsl hydra.sweeper.n_trials=1024
- hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/condense/logistic/ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/logistic/study.csv
- hydra.launcher.n_jobs=-1 --config-name condense_logistic --multirun
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_knn/20/symmetry_true
+ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/20/study.csv files.directory=kdd_nsl
+ files.reports=reports/gzip_knn/20/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_knn --multirun
deps:
- - path: conf/model/best_gzip_logistic_kdd_nsl.yaml
+ - path: conf/gzip_knn.yaml
hash: md5
- md5: e21d828b4b1ad122d7755e986de5b93d
- size: 353
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
+ - path: params.yaml
+ hash: md5
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/condense.yaml:
+ conf/gzip_knn.yaml:
hydra:
run:
- dir: ${dataset}/logs/condense/
+ dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
subdir: ${hydra.job.num}
@@ -16091,26 +6098,26 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
direction: ${direction}
+ storage: sqlite:///optuna.db
+ study_name: ${dataset}_${model_name}_${stage}
+ n_trials: 128
+ n_jobs: 8
max_failure_rate: 1.0
params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
+ model.init.k: 1,3,5,7,11
+ +model.init.weights: uniform,distance
+ +model.init.algorithm: brute
+ model_name: ${model_name}
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -16123,33 +6130,39 @@ stages:
max_nbytes: 100000
mmap_mode: r
outs:
- - path: kdd_nsl/logs/condense/logistic/
+ - path: kdd_nsl/logs/gzip_knn/20/symmetry_true
hash: md5
- md5: 0ce56c12dc58fe66c1fa6fec867b2cf5.dir
- size: 11710344
- nfiles: 4097
- - path: kdd_nsl/reports/condense/logistic/
+ md5: 677d1cdd68cb84a67d83107fc6925c3c.dir
+ size: 1196876
+ nfiles: 513
+ - path: kdd_nsl/reports/gzip_knn/20/symmetry_true/train/
hash: md5
- md5: ae358823518ca6759ddfa8d1c738e367.dir
- size: 3101125
- nfiles: 2948
- condense@truthseeker-knn:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_knn
- model=gzip_knn files.directory=truthseeker files.reports=reports/condense/knn/
- hydra.sweeper.study_name=condense_knn_truthseeker hydra.sweeper.n_trials=1024
- hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/knn/ hydra.callbacks.study_dump.output_file=truthseeker/logs/knn/study.csv
- hydra.launcher.n_jobs=-1 --config-name condense_knn --multirun
+ md5: bb50d06bc8b2fd621dd0a417273884cc.dir
+ size: 341291
+ nfiles: 356
+ grid_search@20-kdd_nsl-gzip_knn-false:
+ cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_knn/20/symmetry_false
+ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/20/study.csv files.directory=kdd_nsl
+ files.reports=reports/gzip_knn/20/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_knn --multirun
deps:
- - path: conf/model/best_gzip_knn_truthseeker.yaml
+ - path: conf/gzip_knn.yaml
+ hash: md5
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
+ - path: params.yaml
hash: md5
- md5: 79baf4709c4a5f2535059ef8d1b6a082
- size: 258
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/condense.yaml:
+ conf/gzip_knn.yaml:
hydra:
run:
- dir: ${dataset}/logs/condense/
+ dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
subdir: ${hydra.job.num}
@@ -16164,26 +6177,26 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
direction: ${direction}
+ storage: sqlite:///optuna.db
+ study_name: ${dataset}_${model_name}_${stage}
+ n_trials: 128
+ n_jobs: 8
max_failure_rate: 1.0
params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
+ model.init.k: 1,3,5,7,11
+ +model.init.weights: uniform,distance
+ +model.init.algorithm: brute
+ model_name: ${model_name}
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -16196,36 +6209,43 @@ stages:
max_nbytes: 100000
mmap_mode: r
outs:
- - path: truthseeker/logs/condense/knn/
+ - path: kdd_nsl/logs/gzip_knn/20/symmetry_false
hash: md5
- md5: 3e8b9011ee1c591904115e67db9a1a50.dir
- size: 11038890
- nfiles: 4097
- - path: truthseeker/reports/condense/knn/
+ md5: 8876b4cdea08cacd9fabea8b7c7e339b.dir
+ size: 1180969
+ nfiles: 513
+ - path: kdd_nsl/reports/gzip_knn/20/symmetry_false/train/
hash: md5
- md5: 1565eb2348976cc6ac9108396141080b.dir
- size: 2831604
- nfiles: 3016
- condense@truthseeker-svc:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_svc
- model=gzip_svc files.directory=truthseeker files.reports=reports/condense/svc/
- hydra.sweeper.study_name=condense_svc_truthseeker hydra.sweeper.n_trials=1024
- hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/svc/ hydra.callbacks.study_dump.output_file=truthseeker/logs/svc/study.csv
- hydra.launcher.n_jobs=-1 --config-name condense_svc --multirun
+ md5: 8635540eb47bb367dbac1b7d6d83afde.dir
+ size: 371913
+ nfiles: 345
+ grid_search@20-kdd_nsl-gzip_logistic-true:
+ cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic
+ model.init.distance_matrix=null model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_kdd_nsl
+ hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/20/symmetry_true
+ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/20/study.csv
+ files.directory=kdd_nsl files.reports=reports/gzip_logistic/20/symmetry_true
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_logistic --multirun
deps:
- - path: conf/model/best_gzip_svc_truthseeker.yaml
+ - path: conf/gzip_logistic.yaml
+ hash: md5
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
+ - path: params.yaml
hash: md5
- md5: 97d9d5857744b1cc077513ac5a659f62
- size: 302
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/condense.yaml:
+ conf/gzip_logistic.yaml:
hydra:
run:
- dir: ${dataset}/logs/condense/
+ dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
- subdir: ${hydra.job.num}
+ subdir: ${hydra.job.id}
callbacks:
study_dump:
_target_: database.OptunaStudyDumpCallback
@@ -16237,26 +6257,29 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
+ study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
+ params:
+ +model.init.solver: saga
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.fit_intercept: True,False
+ +model.init.class_weight: balanced,None
+ model_name: ${model_name}
direction: ${direction}
max_failure_rate: 1.0
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -16269,36 +6292,43 @@ stages:
max_nbytes: 100000
mmap_mode: r
outs:
- - path: truthseeker/logs/condense/svc/
+ - path: kdd_nsl/logs/gzip_logistic/20/symmetry_true
hash: md5
- md5: 845724e35dc3a54bea549410a35d6afd.dir
- size: 11192018
- nfiles: 4097
- - path: truthseeker/reports/condense/svc/
+ md5: 4752da5c6f9e5b19ffa7b85fedaa864d.dir
+ size: 1271405
+ nfiles: 513
+ - path: kdd_nsl/reports/gzip_logistic/20/symmetry_true/train/
hash: md5
- md5: 6cbdc47d51df656dcf7e8ae6221795b3.dir
- size: 2825163
- nfiles: 3064
- condense@truthseeker-logistic:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_logistic
- model=gzip_logistic files.directory=truthseeker files.reports=reports/condense/logistic/
- hydra.sweeper.study_name=condense_logistic_truthseeker hydra.sweeper.n_trials=1024
- hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/logistic/ hydra.callbacks.study_dump.output_file=truthseeker/logs/logistic/study.csv
- hydra.launcher.n_jobs=-1 --config-name condense_logistic --multirun
+ md5: b2fc29717a0256771a595e81e77363c9.dir
+ size: 604610
+ nfiles: 356
+ grid_search@20-kdd_nsl-gzip_logistic-false:
+ cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic
+ model.init.distance_matrix=null model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_kdd_nsl
+ hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/20/symmetry_false
+ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/20/study.csv
+ files.directory=kdd_nsl files.reports=reports/gzip_logistic/20/symmetry_false
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_logistic --multirun
deps:
- - path: conf/model/best_gzip_logistic_truthseeker.yaml
+ - path: conf/gzip_logistic.yaml
+ hash: md5
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
+ - path: params.yaml
hash: md5
- md5: 448e12c542f48c074057e9374743d61e
- size: 326
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/condense.yaml:
+ conf/gzip_logistic.yaml:
hydra:
run:
- dir: ${dataset}/logs/condense/
+ dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
- subdir: ${hydra.job.num}
+ subdir: ${hydra.job.id}
callbacks:
study_dump:
_target_: database.OptunaStudyDumpCallback
@@ -16310,26 +6340,29 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
+ study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
+ params:
+ +model.init.solver: saga
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.fit_intercept: True,False
+ +model.init.class_weight: balanced,None
+ model_name: ${model_name}
direction: ${direction}
max_failure_rate: 1.0
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -16342,71 +6375,76 @@ stages:
max_nbytes: 100000
mmap_mode: r
outs:
- - path: truthseeker/logs/condense/logistic/
+ - path: kdd_nsl/logs/gzip_logistic/20/symmetry_false
hash: md5
- md5: f7e754346e500d1b007b519d86f4c608.dir
- size: 11847643
- nfiles: 4097
- - path: truthseeker/reports/condense/logistic/
+ md5: 24f796fd29b950df2c9d7eb53db47cd2.dir
+ size: 1260414
+ nfiles: 513
+ - path: kdd_nsl/reports/gzip_logistic/20/symmetry_false/train/
hash: md5
- md5: 8bd6876fc856ea5bd1e95b54093aedb8.dir
- size: 2976098
- nfiles: 3011
- condense@sms_spam-knn:
- cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_knn
- model=gzip_knn files.directory=sms_spam files.reports=reports/condense/knn/
- hydra.sweeper.study_name=condense_knn_sms_spam hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=sms_spam/logs/condense/knn/ hydra.callbacks.study_dump.output_file=sms_spam/logs/knn/study.csv
- hydra.launcher.n_jobs=-1 --config-name condense_knn --multirun
+ md5: 6f0315fbb05852baa48643f06ed318ad.dir
+ size: 611076
+ nfiles: 347
+ grid_search@20-kdd_nsl-gzip_svc-true:
+ cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_svc/20/symmetry_true
+ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/20/study.csv files.directory=kdd_nsl
+ files.reports=reports/gzip_svc/20/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_svc --multirun
deps:
- - path: conf/condense_knn.yaml
+ - path: conf/gzip_svc.yaml
hash: md5
- md5: abd25d17a742e467d39dda34b448ba88
- size: 2181
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/condense.yaml:
+ conf/gzip_svc.yaml:
hydra:
run:
- dir: ${dataset}/logs/condense/
+ dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
- subdir: ${hydra.job.num}
+ subdir: ${hydra.job.id}
callbacks:
study_dump:
_target_: database.OptunaStudyDumpCallback
storage: ${hydra.sweeper.storage}
study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
+ directions:
+ - maximize
+ metric_names:
+ - accuracy
output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
+ study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
+ params:
+ +model.init.kernel: rbf,precomputed
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.gamma: scale,auto
+ +model.init.class_weight: balanced,null
+ model_name: ${model_name}
direction: ${direction}
max_failure_rate: 1.0
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -16419,71 +6457,76 @@ stages:
max_nbytes: 100000
mmap_mode: r
outs:
- - path: sms_spam/logs/condense/knn/
+ - path: kdd_nsl/logs/gzip_svc/20/symmetry_true
hash: md5
- md5: ee1eda16b8989f2a23a7dfeba27b4437.dir
- size: 10519093
- nfiles: 4097
- - path: sms_spam/reports/condense/knn/
+ md5: 0cbe34f36b1aacc6101ec1d3d6d878eb.dir
+ size: 1244608
+ nfiles: 513
+ - path: kdd_nsl/reports/gzip_svc/20/symmetry_true/train/
hash: md5
- md5: 84b8fcb1e78a8685141409736c6d6afa.dir
- size: 4713599
- nfiles: 4258
- condense@sms_spam-svc:
- cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_svc
- model=gzip_svc files.directory=sms_spam files.reports=reports/condense/svc/
- hydra.sweeper.study_name=condense_svc_sms_spam hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=sms_spam/logs/condense/svc/ hydra.callbacks.study_dump.output_file=sms_spam/logs/svc/study.csv
- hydra.launcher.n_jobs=-1 --config-name condense_svc --multirun
+ md5: 0ea5d4be51518781035dd7e85b700732.dir
+ size: 554635
+ nfiles: 384
+ grid_search@20-kdd_nsl-gzip_svc-false:
+ cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_svc/20/symmetry_false
+ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/20/study.csv files.directory=kdd_nsl
+ files.reports=reports/gzip_svc/20/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_svc --multirun
deps:
- - path: conf/condense_svc.yaml
+ - path: conf/gzip_svc.yaml
hash: md5
- md5: 7a311db45e697a23a2bed8180fd45e64
- size: 2182
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/condense.yaml:
+ conf/gzip_svc.yaml:
hydra:
run:
- dir: ${dataset}/logs/condense/
+ dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
- subdir: ${hydra.job.num}
+ subdir: ${hydra.job.id}
callbacks:
study_dump:
_target_: database.OptunaStudyDumpCallback
storage: ${hydra.sweeper.storage}
study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
+ directions:
+ - maximize
+ metric_names:
+ - accuracy
output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
+ study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
+ params:
+ +model.init.kernel: rbf,precomputed
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.gamma: scale,auto
+ +model.init.class_weight: balanced,null
+ model_name: ${model_name}
direction: ${direction}
max_failure_rate: 1.0
- params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -16496,37 +6539,39 @@ stages:
max_nbytes: 100000
mmap_mode: r
outs:
- - path: sms_spam/logs/condense/svc/
+ - path: kdd_nsl/logs/gzip_svc/20/symmetry_false
hash: md5
- md5: 9d28ee3f4494d207369bd35c2f5d2164.dir
- size: 11082621
- nfiles: 4097
- - path: sms_spam/reports/condense/svc/
+ md5: 9eba5cbbd68553f794dec337e9606f52.dir
+ size: 1244184
+ nfiles: 513
+ - path: kdd_nsl/reports/gzip_svc/20/symmetry_false/train/
hash: md5
- md5: 200cad31398ec4545e7a490011218c47.dir
- size: 4416840
- nfiles: 3068
- condense@sms_spam-logistic:
+ md5: dc18ba1e036d9b6678d4b97070d84c3c.dir
+ size: 554884
+ nfiles: 384
+ grid_search@20-sms_spam-gzip_knn-true:
cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_logistic
- model=gzip_logistic files.directory=sms_spam files.reports=reports/condense/logistic/
- hydra.sweeper.study_name=condense_logistic_sms_spam hydra.sweeper.n_trials=1024
- hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/condense/logistic/ hydra.callbacks.study_dump.output_file=sms_spam/logs/logistic/study.csv
- hydra.launcher.n_jobs=-1 --config-name condense_logistic --multirun
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_knn/20/symmetry_true
+ hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/20/study.csv files.directory=sms_spam
+ files.reports=reports/gzip_knn/20/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_knn --multirun
deps:
- - path: conf/condense_logistic.yaml
+ - path: conf/gzip_knn.yaml
hash: md5
- md5: 85b6d1d835afd7e95b5b9f804fbd7119
- size: 2326
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/condense.yaml:
+ conf/gzip_knn.yaml:
hydra:
run:
- dir: ${dataset}/logs/condense/
+ dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
subdir: ${hydra.job.num}
@@ -16541,752 +6586,967 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}
- storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
direction: ${direction}
+ storage: sqlite:///optuna.db
+ study_name: ${dataset}_${model_name}_${stage}
+ n_trials: 128
+ n_jobs: 8
max_failure_rate: 1.0
params:
- ++data.sample.train_size: 1000
- ++data.sample.random_state: int(interval(10000, 20000))
- model.init.m: tag(log, interval(.01, .1))
- +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn
+ model.init.k: 1,3,5,7,11
+ +model.init.weights: uniform,distance
+ +model.init.algorithm: brute
+ model_name: ${model_name}
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
prefer: processes
verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- outs:
- - path: sms_spam/logs/condense/logistic/
- hash: md5
- md5: 3846050e3a2341b246c2c3366debe0dc.dir
- size: 11620551
- nfiles: 4097
- - path: sms_spam/reports/condense/logistic/
- hash: md5
- md5: 05562ae582796b70d35ae7062a5030d7.dir
- size: 9597627
- nfiles: 6388
- compile@sms_spam-condense/logistic:
- cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/condense/logistic --results_file
- sms_spam/reports/condense/logistic.csv
- deps:
- - path: sms_spam/reports/condense/logistic/
- hash: md5
- md5: 05562ae582796b70d35ae7062a5030d7.dir
- size: 9597627
- nfiles: 6388
- outs:
- - path: sms_spam/reports/condense/logistic.csv
- hash: md5
- md5: 7094b26a582820cc1f88512573ce8c25
- size: 3430438
- compile@kdd_nsl-condense/svc:
- cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/condense/svc --results_file
- kdd_nsl/reports/condense/svc.csv
- deps:
- - path: kdd_nsl/reports/condense/svc/
- hash: md5
- md5: ad27897c6454024915fdcef827219bd3.dir
- size: 8340639
- nfiles: 5462
- outs:
- - path: kdd_nsl/reports/condense/svc.csv
- hash: md5
- md5: 643a67cb6d5974a787efa6339e3af058
- size: 3003804
- compile@kdd_nsl-condense/logistic:
- cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/condense/logistic --results_file
- kdd_nsl/reports/condense/logistic.csv
- deps:
- - path: kdd_nsl/reports/condense/logistic/
- hash: md5
- md5: df73404e3f7d00371dd55b40e76fa9e0.dir
- size: 3112185
- nfiles: 2954
- outs:
- - path: kdd_nsl/reports/condense/logistic.csv
- hash: md5
- md5: 4193461c63aca8b61956fc443f5bcd3d
- size: 1649004
- compile@ddos-condense/svc:
- cmd: python -m deckard.layers.compile --report_folder ddos/reports/condense/svc --results_file
- ddos/reports/condense/svc.csv
- deps:
- - path: ddos/reports/condense/svc/
- hash: md5
- md5: b40b878f7eca11a9eae0c19e054bee47.dir
- size: 8854939
- nfiles: 7199
- outs:
- - path: ddos/reports/condense/svc.csv
- hash: md5
- md5: 76b35c3e1dfa2d0476a737f9a41c25c4
- size: 3771755
- compile@truthseeker-condense/knn:
- cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/condense/knn --results_file
- truthseeker/reports/condense/knn.csv
- deps:
- - path: truthseeker/reports/condense/knn/
- hash: md5
- md5: 1565eb2348976cc6ac9108396141080b.dir
- size: 2831604
- nfiles: 3016
- outs:
- - path: truthseeker/reports/condense/knn.csv
- hash: md5
- md5: b4ec50d98f613984be6261a059120255
- size: 1595839
- compile@truthseeker-condense/svc:
- cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/condense/svc --results_file
- truthseeker/reports/condense/svc.csv
- deps:
- - path: truthseeker/reports/condense/svc/
- hash: md5
- md5: 6cbdc47d51df656dcf7e8ae6221795b3.dir
- size: 2825163
- nfiles: 3064
- outs:
- - path: truthseeker/reports/condense/svc.csv
- hash: md5
- md5: 4cdede4407c88bcda2afc8bbeae91ace
- size: 1617655
- compile@ddos-condense/knn:
- cmd: python -m deckard.layers.compile --report_folder ddos/reports/condense/knn --results_file
- ddos/reports/condense/knn.csv
- deps:
- - path: ddos/reports/condense/knn/
- hash: md5
- md5: 9b6918814be3bea732abc71b8684fd8d.dir
- size: 8458502
- nfiles: 9157
- outs:
- - path: ddos/reports/condense/knn.csv
- hash: md5
- md5: 0cd0ff58f94fb06093779ff81d37d2bf
- size: 4723182
- compile@sms_spam-condense/svc:
- cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/condense/svc --results_file
- sms_spam/reports/condense/svc.csv
- deps:
- - path: sms_spam/reports/condense/svc/
- hash: md5
- md5: 200cad31398ec4545e7a490011218c47.dir
- size: 4416840
- nfiles: 3068
- outs:
- - path: sms_spam/reports/condense/svc.csv
- hash: md5
- md5: 32f06cbea623f845dcfa7400d707abad
- size: 1573621
- compile@kdd_nsl-condense/knn:
- cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/condense/knn --results_file
- kdd_nsl/reports/condense/knn.csv
- deps:
- - path: kdd_nsl/reports/condense/knn/
- hash: md5
- md5: 3f8eb680f1f8960490e4581bfa16cfd2.dir
- size: 2869636
- nfiles: 3011
- outs:
- - path: kdd_nsl/reports/condense/knn.csv
- hash: md5
- md5: 29211ec6d9b2b1a5e9193eaabfff3488
- size: 1608857
- compile@truthseeker-condense/logistic:
- cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/condense/logistic --results_file
- truthseeker/reports/condense/logistic.csv
- deps:
- - path: truthseeker/reports/condense/logistic/
- hash: md5
- md5: 8bd6876fc856ea5bd1e95b54093aedb8.dir
- size: 2976098
- nfiles: 3011
- outs:
- - path: truthseeker/reports/condense/logistic.csv
- hash: md5
- md5: 5c01852f352ac96150fb36c2df9bcbbf
- size: 1648856
- compile@sms_spam-condense/knn:
- cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/condense/knn --results_file
- sms_spam/reports/condense/knn.csv
- deps:
- - path: sms_spam/reports/condense/knn/
- hash: md5
- md5: 84b8fcb1e78a8685141409736c6d6afa.dir
- size: 4713599
- nfiles: 4258
- outs:
- - path: sms_spam/reports/condense/knn.csv
- hash: md5
- md5: c8d4f7036e0c3e1cf8fa5a0b922c6ecc
- size: 2287605
- compile@ddos-condense/logistic:
- cmd: python -m deckard.layers.compile --report_folder ddos/reports/condense/logistic --results_file
- ddos/reports/condense/logistic.csv
- deps:
- - path: ddos/reports/condense/logistic/
- hash: md5
- md5: 7ce841278929a90690417685b7c7f143.dir
- size: 5929815
- nfiles: 5888
- outs:
- - path: ddos/reports/condense/logistic.csv
- hash: md5
- md5: b24764aed957fdf6d2ccb541ef490d37
- size: 3150984
- clean@sms_spam-condense/svc:
- cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/svc.csv
- -o sms_spam/plots/clean/condense/svc.csv -c conf/clean.yaml
- deps:
- - path: sms_spam/reports/condense/svc.csv
- hash: md5
- md5: 32f06cbea623f845dcfa7400d707abad
- size: 1573621
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: sms_spam/plots/clean/condense/svc.csv
- hash: md5
- md5: 92b8648f6759e0a56c65aeec4a15aa92
- size: 1223675
- clean@ddos-condense/knn:
- cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/knn.csv -o
- ddos/plots/clean/condense/knn.csv -c conf/clean.yaml
- deps:
- - path: ddos/reports/condense/knn.csv
- hash: md5
- md5: 0cd0ff58f94fb06093779ff81d37d2bf
- size: 4723182
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: ddos/plots/clean/condense/knn.csv
- hash: md5
- md5: d214914ecfbba6afbd4ff9a61cb96bb1
- size: 3652514
- clean@truthseeker-condense/svc:
- cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/svc.csv
- -o truthseeker/plots/clean/condense/svc.csv -c conf/clean.yaml
- deps:
- - path: truthseeker/reports/condense/svc.csv
- hash: md5
- md5: 4cdede4407c88bcda2afc8bbeae91ace
- size: 1617655
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: truthseeker/plots/clean/condense/svc.csv
- hash: md5
- md5: a17c0cdb6a3fbfae5bd4fcfca1938a96
- size: 1257671
- clean@kdd_nsl-condense/knn:
- cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/knn.csv
- -o kdd_nsl/plots/clean/condense/knn.csv -c conf/clean.yaml
- deps:
- - path: kdd_nsl/reports/condense/knn.csv
- hash: md5
- md5: 29211ec6d9b2b1a5e9193eaabfff3488
- size: 1608857
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
+ timeout:
+ pre_dispatch: ${hydra.sweeper.n_jobs}
+ batch_size: auto
+ temp_folder: /tmp/deckard
+ max_nbytes: 100000
+ mmap_mode: r
outs:
- - path: kdd_nsl/plots/clean/condense/knn.csv
- hash: md5
- md5: 23789b08b0fd1616555611d0e7971db9
- size: 1204868
- clean@kdd_nsl-condense/svc:
- cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/svc.csv
- -o kdd_nsl/plots/clean/condense/svc.csv -c conf/clean.yaml
- deps:
- - path: kdd_nsl/reports/condense/svc.csv
+ - path: sms_spam/logs/gzip_knn/20/symmetry_true
hash: md5
- md5: 643a67cb6d5974a787efa6339e3af058
- size: 3003804
- params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
- outs:
- - path: kdd_nsl/plots/clean/condense/svc.csv
+ md5: b900fa95011e3c9620f9a7103baa47a1.dir
+ size: 1193555
+ nfiles: 513
+ - path: sms_spam/reports/gzip_knn/20/symmetry_true/train/
hash: md5
- md5: c9b2ff8546f531fa439c664c63fc06fd
- size: 2021393
- clean@kdd_nsl-condense/logistic:
- cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/logistic.csv
- -o kdd_nsl/plots/clean/condense/logistic.csv -c conf/clean.yaml
+ md5: 0c2256ed804059b75873b27f8963204e.dir
+ size: 329514
+ nfiles: 356
+ grid_search@20-sms_spam-gzip_knn-false:
+ cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_knn/20/symmetry_false
+ hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/20/study.csv files.directory=sms_spam
+ files.reports=reports/gzip_knn/20/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_knn --multirun
deps:
- - path: kdd_nsl/reports/condense/logistic.csv
+ - path: conf/gzip_knn.yaml
hash: md5
- md5: 4193461c63aca8b61956fc443f5bcd3d
- size: 1649004
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
+ - path: params.yaml
+ hash: md5
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
+ conf/gzip_knn.yaml:
+ hydra:
+ run:
+ dir: ${dataset}/logs/${stage}/
+ sweep:
+ dir: ???
+ subdir: ${hydra.job.num}
+ callbacks:
+ study_dump:
+ _target_: database.OptunaStudyDumpCallback
+ storage: ${hydra.sweeper.storage}
+ study_name: ${hydra.sweeper.study_name}
+ directions: ${direction}
+ metric_names: ${optimizers}
+ output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
+ sweeper:
+ sampler:
+ _target_: optuna.samplers.TPESampler
+ consider_prior: true
+ seed: 123
+ prior_weight: 1.0
+ consider_magic_clip: true
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
+ multivariate: true
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+ direction: ${direction}
+ storage: sqlite:///optuna.db
+ study_name: ${dataset}_${model_name}_${stage}
+ n_trials: 128
+ n_jobs: 8
+ max_failure_rate: 1.0
+ params:
+ model.init.k: 1,3,5,7,11
+ +model.init.weights: uniform,distance
+ +model.init.algorithm: brute
+ model_name: ${model_name}
+ launcher:
+ _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
+ n_jobs: 8
+ prefer: processes
+ verbose: 1
+ timeout:
+ pre_dispatch: ${hydra.sweeper.n_jobs}
+ batch_size: auto
+ temp_folder: /tmp/deckard
+ max_nbytes: 100000
+ mmap_mode: r
outs:
- - path: kdd_nsl/plots/clean/condense/logistic.csv
+ - path: sms_spam/logs/gzip_knn/20/symmetry_false
hash: md5
- md5: 55a0ac50149a3e3d93b69c63ccd0d7a3
- size: 1174964
- clean@sms_spam-condense/knn:
- cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/knn.csv
- -o sms_spam/plots/clean/condense/knn.csv -c conf/clean.yaml
+ md5: 0554269057beb85cd3746813652ba9d5.dir
+ size: 1191491
+ nfiles: 513
+ - path: sms_spam/reports/gzip_knn/20/symmetry_false/train/
+ hash: md5
+ md5: e25f72d029f72432d5c9a5ffacec0208.dir
+ size: 341814
+ nfiles: 356
+ grid_search@20-sms_spam-gzip_logistic-true:
+ cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic
+ model.init.distance_matrix=null model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_sms_spam
+ hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/20/symmetry_true
+ hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/20/study.csv
+ files.directory=sms_spam files.reports=reports/gzip_logistic/20/symmetry_true
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_logistic --multirun
deps:
- - path: sms_spam/reports/condense/knn.csv
+ - path: conf/gzip_logistic.yaml
+ hash: md5
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
+ - path: params.yaml
hash: md5
- md5: c8d4f7036e0c3e1cf8fa5a0b922c6ecc
- size: 2287605
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
+ conf/gzip_logistic.yaml:
+ hydra:
+ run:
+ dir: ${dataset}/logs/${stage}/
+ sweep:
+ dir: ???
+ subdir: ${hydra.job.id}
+ callbacks:
+ study_dump:
+ _target_: database.OptunaStudyDumpCallback
+ storage: ${hydra.sweeper.storage}
+ study_name: ${hydra.sweeper.study_name}
+ directions: ${direction}
+ metric_names: ${optimizers}
+ output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
+ sweeper:
+ sampler:
+ _target_: optuna.samplers.TPESampler
+ consider_prior: true
+ seed: 123
+ prior_weight: 1.0
+ consider_magic_clip: true
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
+ multivariate: true
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+ study_name: ${dataset}_${model_name}_${stage}
+ storage: sqlite:///optuna.db
+ n_trials: 128
+ n_jobs: 8
+ params:
+ +model.init.solver: saga
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.fit_intercept: True,False
+ +model.init.class_weight: balanced,None
+ model_name: ${model_name}
+ direction: ${direction}
+ max_failure_rate: 1.0
+ launcher:
+ _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
+ n_jobs: 8
+ prefer: processes
+ verbose: 1
+ timeout:
+ pre_dispatch: ${hydra.sweeper.n_jobs}
+ batch_size: auto
+ temp_folder: /tmp/deckard
+ max_nbytes: 100000
+ mmap_mode: r
outs:
- - path: sms_spam/plots/clean/condense/knn.csv
+ - path: sms_spam/logs/gzip_logistic/20/symmetry_true
hash: md5
- md5: 7dda620e8ae59aab14ac83c0071a8b96
- size: 1268504
- clean@sms_spam-condense/logistic:
- cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/logistic.csv
- -o sms_spam/plots/clean/condense/logistic.csv -c conf/clean.yaml
+ md5: b95404e2e4b0a957a788e82f65a49a10.dir
+ size: 1268014
+ nfiles: 513
+ - path: sms_spam/reports/gzip_logistic/20/symmetry_true/train/
+ hash: md5
+ md5: b2333589409b837e4233aa2fb7cded97.dir
+ size: 592315
+ nfiles: 356
+ grid_search@20-sms_spam-gzip_logistic-false:
+ cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic
+ model.init.distance_matrix=null model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_sms_spam
+ hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/20/symmetry_false
+ hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/20/study.csv
+ files.directory=sms_spam files.reports=reports/gzip_logistic/20/symmetry_false
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_logistic --multirun
deps:
- - path: sms_spam/reports/condense/logistic.csv
+ - path: conf/gzip_logistic.yaml
hash: md5
- md5: 7094b26a582820cc1f88512573ce8c25
- size: 3430438
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
+ - path: params.yaml
+ hash: md5
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
+ conf/gzip_logistic.yaml:
+ hydra:
+ run:
+ dir: ${dataset}/logs/${stage}/
+ sweep:
+ dir: ???
+ subdir: ${hydra.job.id}
+ callbacks:
+ study_dump:
+ _target_: database.OptunaStudyDumpCallback
+ storage: ${hydra.sweeper.storage}
+ study_name: ${hydra.sweeper.study_name}
+ directions: ${direction}
+ metric_names: ${optimizers}
+ output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
+ sweeper:
+ sampler:
+ _target_: optuna.samplers.TPESampler
+ consider_prior: true
+ seed: 123
+ prior_weight: 1.0
+ consider_magic_clip: true
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
+ multivariate: true
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+ study_name: ${dataset}_${model_name}_${stage}
+ storage: sqlite:///optuna.db
+ n_trials: 128
+ n_jobs: 8
+ params:
+ +model.init.solver: saga
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.fit_intercept: True,False
+ +model.init.class_weight: balanced,None
+ model_name: ${model_name}
+ direction: ${direction}
+ max_failure_rate: 1.0
+ launcher:
+ _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
+ n_jobs: 8
+ prefer: processes
+ verbose: 1
+ timeout:
+ pre_dispatch: ${hydra.sweeper.n_jobs}
+ batch_size: auto
+ temp_folder: /tmp/deckard
+ max_nbytes: 100000
+ mmap_mode: r
outs:
- - path: sms_spam/plots/clean/condense/logistic.csv
+ - path: sms_spam/logs/gzip_logistic/20/symmetry_false
hash: md5
- md5: 1f89cfa87c87f195079e49eb5d6e7ce5
- size: 2461824
- clean@truthseeker-condense/logistic:
- cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/logistic.csv
- -o truthseeker/plots/clean/condense/logistic.csv -c conf/clean.yaml
+ md5: 9d4569ebac94dccb57a6d50c04fd2b1c.dir
+ size: 1252292
+ nfiles: 513
+ - path: sms_spam/reports/gzip_logistic/20/symmetry_false/train/
+ hash: md5
+ md5: a4a3af08dfca0a0ba5b94bb0a9ea735a.dir
+ size: 603823
+ nfiles: 343
+ grid_search@20-sms_spam-gzip_svc-true:
+ cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_svc/20/symmetry_true
+ hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/20/study.csv files.directory=sms_spam
+ files.reports=reports/gzip_svc/20/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_svc --multirun
deps:
- - path: truthseeker/reports/condense/logistic.csv
+ - path: conf/gzip_svc.yaml
+ hash: md5
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
+ - path: params.yaml
hash: md5
- md5: 5c01852f352ac96150fb36c2df9bcbbf
- size: 1648856
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
+ conf/gzip_svc.yaml:
+ hydra:
+ run:
+ dir: ${dataset}/logs/${stage}/
+ sweep:
+ dir: ???
+ subdir: ${hydra.job.id}
+ callbacks:
+ study_dump:
+ _target_: database.OptunaStudyDumpCallback
+ storage: ${hydra.sweeper.storage}
+ study_name: ${hydra.sweeper.study_name}
+ directions:
+ - maximize
+ metric_names:
+ - accuracy
+ output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
+ sweeper:
+ sampler:
+ _target_: optuna.samplers.TPESampler
+ consider_prior: true
+ seed: 123
+ prior_weight: 1.0
+ consider_magic_clip: true
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
+ multivariate: true
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+ study_name: ${dataset}_${model_name}_${stage}
+ storage: sqlite:///optuna.db
+ n_trials: 128
+ n_jobs: 8
+ params:
+ +model.init.kernel: rbf,precomputed
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.gamma: scale,auto
+ +model.init.class_weight: balanced,null
+ model_name: ${model_name}
+ direction: ${direction}
+ max_failure_rate: 1.0
+ launcher:
+ _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
+ n_jobs: 8
+ prefer: processes
+ verbose: 1
+ timeout:
+ pre_dispatch: ${hydra.sweeper.n_jobs}
+ batch_size: auto
+ temp_folder: /tmp/deckard
+ max_nbytes: 100000
+ mmap_mode: r
outs:
- - path: truthseeker/plots/clean/condense/logistic.csv
+ - path: sms_spam/logs/gzip_svc/20/symmetry_true
hash: md5
- md5: 9710addb440069a5ea884d90ed4c394a
- size: 1237939
- clean@truthseeker-condense/knn:
- cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/knn.csv
- -o truthseeker/plots/clean/condense/knn.csv -c conf/clean.yaml
+ md5: 97f387456af594e96fe70ae39cfe8018.dir
+ size: 1241267
+ nfiles: 513
+ - path: sms_spam/reports/gzip_svc/20/symmetry_true/train/
+ hash: md5
+ md5: aa3a7443b115c46ce08aa7a70a7fb77c.dir
+ size: 542327
+ nfiles: 384
+ grid_search@20-sms_spam-gzip_svc-false:
+ cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_svc/20/symmetry_false
+ hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/20/study.csv files.directory=sms_spam
+ files.reports=reports/gzip_svc/20/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_svc --multirun
deps:
- - path: truthseeker/reports/condense/knn.csv
+ - path: conf/gzip_svc.yaml
+ hash: md5
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
+ - path: params.yaml
hash: md5
- md5: b4ec50d98f613984be6261a059120255
- size: 1595839
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
+ conf/gzip_svc.yaml:
+ hydra:
+ run:
+ dir: ${dataset}/logs/${stage}/
+ sweep:
+ dir: ???
+ subdir: ${hydra.job.id}
+ callbacks:
+ study_dump:
+ _target_: database.OptunaStudyDumpCallback
+ storage: ${hydra.sweeper.storage}
+ study_name: ${hydra.sweeper.study_name}
+ directions:
+ - maximize
+ metric_names:
+ - accuracy
+ output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
+ sweeper:
+ sampler:
+ _target_: optuna.samplers.TPESampler
+ consider_prior: true
+ seed: 123
+ prior_weight: 1.0
+ consider_magic_clip: true
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
+ multivariate: true
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+ study_name: ${dataset}_${model_name}_${stage}
+ storage: sqlite:///optuna.db
+ n_trials: 128
+ n_jobs: 8
+ params:
+ +model.init.kernel: rbf,precomputed
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.gamma: scale,auto
+ +model.init.class_weight: balanced,null
+ model_name: ${model_name}
+ direction: ${direction}
+ max_failure_rate: 1.0
+ launcher:
+ _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
+ n_jobs: 8
+ prefer: processes
+ verbose: 1
+ timeout:
+ pre_dispatch: ${hydra.sweeper.n_jobs}
+ batch_size: auto
+ temp_folder: /tmp/deckard
+ max_nbytes: 100000
+ mmap_mode: r
outs:
- - path: truthseeker/plots/clean/condense/knn.csv
+ - path: sms_spam/logs/gzip_svc/20/symmetry_false
hash: md5
- md5: a0c8deb8fe7617477ec43fae2a851b4d
- size: 1191230
- clean@ddos-condense/svc:
- cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/svc.csv -o
- ddos/plots/clean/condense/svc.csv -c conf/clean.yaml
+ md5: dccf212ddba8d745daa30ce1c9efd0b1.dir
+ size: 1240872
+ nfiles: 513
+ - path: sms_spam/reports/gzip_svc/20/symmetry_false/train/
+ hash: md5
+ md5: 923ea8186f9d9630e26fa0da18e03508.dir
+ size: 542578
+ nfiles: 384
+ grid_search@20-truthseeker-gzip_knn-true:
+ cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_knn/20/symmetry_true
+ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/20/study.csv
+ files.directory=truthseeker files.reports=reports/gzip_knn/20/symmetry_true
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_knn --multirun
deps:
- - path: ddos/reports/condense/svc.csv
+ - path: conf/gzip_knn.yaml
+ hash: md5
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
+ - path: params.yaml
hash: md5
- md5: 76b35c3e1dfa2d0476a737f9a41c25c4
- size: 3771755
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
+ conf/gzip_knn.yaml:
+ hydra:
+ run:
+ dir: ${dataset}/logs/${stage}/
+ sweep:
+ dir: ???
+ subdir: ${hydra.job.num}
+ callbacks:
+ study_dump:
+ _target_: database.OptunaStudyDumpCallback
+ storage: ${hydra.sweeper.storage}
+ study_name: ${hydra.sweeper.study_name}
+ directions: ${direction}
+ metric_names: ${optimizers}
+ output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
+ sweeper:
+ sampler:
+ _target_: optuna.samplers.TPESampler
+ consider_prior: true
+ seed: 123
+ prior_weight: 1.0
+ consider_magic_clip: true
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
+ multivariate: true
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+ direction: ${direction}
+ storage: sqlite:///optuna.db
+ study_name: ${dataset}_${model_name}_${stage}
+ n_trials: 128
+ n_jobs: 8
+ max_failure_rate: 1.0
+ params:
+ model.init.k: 1,3,5,7,11
+ +model.init.weights: uniform,distance
+ +model.init.algorithm: brute
+ model_name: ${model_name}
+ launcher:
+ _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
+ n_jobs: 8
+ prefer: processes
+ verbose: 1
+ timeout:
+ pre_dispatch: ${hydra.sweeper.n_jobs}
+ batch_size: auto
+ temp_folder: /tmp/deckard
+ max_nbytes: 100000
+ mmap_mode: r
outs:
- - path: ddos/plots/clean/condense/svc.csv
+ - path: truthseeker/logs/gzip_knn/20/symmetry_true
hash: md5
- md5: 102b712883464d547a4d2119f6c5df60
- size: 2968961
- clean@ddos-condense/logistic:
- cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/logistic.csv
- -o ddos/plots/clean/condense/logistic.csv -c conf/clean.yaml
+ md5: a98ed7354eb47190c6301eb889704388.dir
+ size: 1206224
+ nfiles: 513
+ - path: truthseeker/reports/gzip_knn/20/symmetry_true/train/
+ hash: md5
+ md5: ad20e69c6454627f1483726b0cc91365.dir
+ size: 331035
+ nfiles: 359
+ grid_search@20-truthseeker-gzip_knn-false:
+ cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_knn/20/symmetry_false
+ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/20/study.csv
+ files.directory=truthseeker files.reports=reports/gzip_knn/20/symmetry_false
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_knn --multirun
deps:
- - path: ddos/reports/condense/logistic.csv
+ - path: conf/gzip_knn.yaml
+ hash: md5
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
+ - path: params.yaml
hash: md5
- md5: b24764aed957fdf6d2ccb541ef490d37
- size: 3150984
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/clean.yaml:
- replace:
- model.init.metric:
- jaro: Jaro
- _winkler: -Winkler
- levenshtein: Levenshtein
- ncd: NCD
- ratio: Ratio
- seqRatio: SeqRatio
- hamming: Hamming
- gzip: Gzip
- pkl: Pickle
- bz2: BZ2
- zstd: Zstd
- lzma: Lzma
- model_name:
- GzipSVC: k-SVC
- GzipLogisticRegressor: k-Logistic
- GzipKNN: k-KNN
- model.init.symmetric:
- true: Symmetric
- false: Asymmetric
+ conf/gzip_knn.yaml:
+ hydra:
+ run:
+ dir: ${dataset}/logs/${stage}/
+ sweep:
+ dir: ???
+ subdir: ${hydra.job.num}
+ callbacks:
+ study_dump:
+ _target_: database.OptunaStudyDumpCallback
+ storage: ${hydra.sweeper.storage}
+ study_name: ${hydra.sweeper.study_name}
+ directions: ${direction}
+ metric_names: ${optimizers}
+ output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
+ sweeper:
+ sampler:
+ _target_: optuna.samplers.TPESampler
+ consider_prior: true
+ seed: 123
+ prior_weight: 1.0
+ consider_magic_clip: true
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
+ multivariate: true
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+ direction: ${direction}
+ storage: sqlite:///optuna.db
+ study_name: ${dataset}_${model_name}_${stage}
+ n_trials: 128
+ n_jobs: 8
+ max_failure_rate: 1.0
+ params:
+ model.init.k: 1,3,5,7,11
+ +model.init.weights: uniform,distance
+ +model.init.algorithm: brute
+ model_name: ${model_name}
+ launcher:
+ _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
+ n_jobs: 8
+ prefer: processes
+ verbose: 1
+ timeout:
+ pre_dispatch: ${hydra.sweeper.n_jobs}
+ batch_size: auto
+ temp_folder: /tmp/deckard
+ max_nbytes: 100000
+ mmap_mode: r
outs:
- - path: ddos/plots/clean/condense/logistic.csv
+ - path: truthseeker/logs/gzip_knn/20/symmetry_false
hash: md5
- md5: bfca6e865bca11a25fa1e42dfbdea0ad
- size: 2331762
- merge_condense@ddos:
- cmd: python merge.py --big_dir ddos/plots/ --data_file clean/condense/knn.csv
- --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder
- ddos/plots/ --output_file condensed_merged.csv
- deps:
- - path: ddos/plots/clean/condense/knn.csv
+ md5: 2617ca5cb1d8ff3905d50915269c6e9f.dir
+ size: 1203425
+ nfiles: 513
+ - path: truthseeker/reports/gzip_knn/20/symmetry_false/train/
hash: md5
- md5: d214914ecfbba6afbd4ff9a61cb96bb1
- size: 3652514
- - path: ddos/plots/clean/condense/logistic.csv
+ md5: 4a06f23a3f742c65df6594ee04759bf8.dir
+ size: 342243
+ nfiles: 358
+ grid_search@20-truthseeker-gzip_logistic-true:
+ cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic
+ model.init.distance_matrix=null model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_truthseeker
+ hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/20/symmetry_true
+ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/20/study.csv
+ files.directory=truthseeker files.reports=reports/gzip_logistic/20/symmetry_true
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_logistic --multirun
+ deps:
+ - path: conf/gzip_logistic.yaml
hash: md5
- md5: bfca6e865bca11a25fa1e42dfbdea0ad
- size: 2331762
- - path: ddos/plots/clean/condense/svc.csv
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
+ - path: params.yaml
hash: md5
- md5: 102b712883464d547a4d2119f6c5df60
- size: 2968961
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
+ params:
+ conf/gzip_logistic.yaml:
+ hydra:
+ run:
+ dir: ${dataset}/logs/${stage}/
+ sweep:
+ dir: ???
+ subdir: ${hydra.job.id}
+ callbacks:
+ study_dump:
+ _target_: database.OptunaStudyDumpCallback
+ storage: ${hydra.sweeper.storage}
+ study_name: ${hydra.sweeper.study_name}
+ directions: ${direction}
+ metric_names: ${optimizers}
+ output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
+ sweeper:
+ sampler:
+ _target_: optuna.samplers.TPESampler
+ consider_prior: true
+ seed: 123
+ prior_weight: 1.0
+ consider_magic_clip: true
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
+ multivariate: true
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+ study_name: ${dataset}_${model_name}_${stage}
+ storage: sqlite:///optuna.db
+ n_trials: 128
+ n_jobs: 8
+ params:
+ +model.init.solver: saga
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.fit_intercept: True,False
+ +model.init.class_weight: balanced,None
+ model_name: ${model_name}
+ direction: ${direction}
+ max_failure_rate: 1.0
+ launcher:
+ _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
+ n_jobs: 8
+ prefer: processes
+ verbose: 1
+ timeout:
+ pre_dispatch: ${hydra.sweeper.n_jobs}
+ batch_size: auto
+ temp_folder: /tmp/deckard
+ max_nbytes: 100000
+ mmap_mode: r
outs:
- - path: ddos/plots/condensed_merged.csv
+ - path: truthseeker/logs/gzip_logistic/20/symmetry_true
hash: md5
- md5: dc147a2e9c585b39c5e212a46ade70ac
- size: 9306964
- merge_condense@kdd_nsl:
- cmd: python merge.py --big_dir kdd_nsl/plots/ --data_file clean/condense/knn.csv
- --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder
- kdd_nsl/plots/ --output_file condensed_merged.csv
- deps:
- - path: kdd_nsl/plots/clean/condense/knn.csv
+ md5: ff829c546214f8c48b65d65886826fa3.dir
+ size: 1277433
+ nfiles: 513
+ - path: truthseeker/reports/gzip_logistic/20/symmetry_true/train/
hash: md5
- md5: 23789b08b0fd1616555611d0e7971db9
- size: 1204868
- - path: kdd_nsl/plots/clean/condense/logistic.csv
+ md5: 9fa0a99c495e46db650c6a7a5b520119.dir
+ size: 596142
+ nfiles: 356
+ grid_search@20-truthseeker-gzip_logistic-false:
+ cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic
+ model.init.distance_matrix=null model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_truthseeker
+ hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/20/symmetry_false
+ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/20/study.csv
+ files.directory=truthseeker files.reports=reports/gzip_logistic/20/symmetry_false
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_logistic --multirun
+ deps:
+ - path: conf/gzip_logistic.yaml
hash: md5
- md5: 55a0ac50149a3e3d93b69c63ccd0d7a3
- size: 1174964
- - path: kdd_nsl/plots/clean/condense/svc.csv
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
+ - path: params.yaml
hash: md5
- md5: c9b2ff8546f531fa439c664c63fc06fd
- size: 2021393
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
+ params:
+ conf/gzip_logistic.yaml:
+ hydra:
+ run:
+ dir: ${dataset}/logs/${stage}/
+ sweep:
+ dir: ???
+ subdir: ${hydra.job.id}
+ callbacks:
+ study_dump:
+ _target_: database.OptunaStudyDumpCallback
+ storage: ${hydra.sweeper.storage}
+ study_name: ${hydra.sweeper.study_name}
+ directions: ${direction}
+ metric_names: ${optimizers}
+ output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
+ sweeper:
+ sampler:
+ _target_: optuna.samplers.TPESampler
+ consider_prior: true
+ seed: 123
+ prior_weight: 1.0
+ consider_magic_clip: true
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
+ multivariate: true
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+ study_name: ${dataset}_${model_name}_${stage}
+ storage: sqlite:///optuna.db
+ n_trials: 128
+ n_jobs: 8
+ params:
+ +model.init.solver: saga
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.fit_intercept: True,False
+ +model.init.class_weight: balanced,None
+ model_name: ${model_name}
+ direction: ${direction}
+ max_failure_rate: 1.0
+ launcher:
+ _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
+ n_jobs: 8
+ prefer: processes
+ verbose: 1
+ timeout:
+ pre_dispatch: ${hydra.sweeper.n_jobs}
+ batch_size: auto
+ temp_folder: /tmp/deckard
+ max_nbytes: 100000
+ mmap_mode: r
outs:
- - path: kdd_nsl/plots/condensed_merged.csv
+ - path: truthseeker/logs/gzip_logistic/20/symmetry_false
hash: md5
- md5: 1ddcee7de7db0c1a7d4898de4a03d7b7
- size: 4543759
- merge_condense@sms_spam:
- cmd: python merge.py --big_dir sms_spam/plots/ --data_file clean/condense/knn.csv
- --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder
- sms_spam/plots/ --output_file condensed_merged.csv
- deps:
- - path: sms_spam/plots/clean/condense/knn.csv
+ md5: 3236c08228d49f414fb9276f63fd854e.dir
+ size: 1265237
+ nfiles: 513
+ - path: truthseeker/reports/gzip_logistic/20/symmetry_false/train/
hash: md5
- md5: 7dda620e8ae59aab14ac83c0071a8b96
- size: 1268504
- - path: sms_spam/plots/clean/condense/logistic.csv
+ md5: 61c25a8988641a6780633c71c79af7b1.dir
+ size: 603920
+ nfiles: 346
+ grid_search@20-truthseeker-gzip_svc-true:
+ cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_svc/20/symmetry_true
+ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/20/study.csv
+ files.directory=truthseeker files.reports=reports/gzip_svc/20/symmetry_true
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_svc --multirun
+ deps:
+ - path: conf/gzip_svc.yaml
hash: md5
- md5: 1f89cfa87c87f195079e49eb5d6e7ce5
- size: 2461824
- - path: sms_spam/plots/clean/condense/svc.csv
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
+ - path: params.yaml
hash: md5
- md5: 92b8648f6759e0a56c65aeec4a15aa92
- size: 1223675
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
+ params:
+ conf/gzip_svc.yaml:
+ hydra:
+ run:
+ dir: ${dataset}/logs/${stage}/
+ sweep:
+ dir: ???
+ subdir: ${hydra.job.id}
+ callbacks:
+ study_dump:
+ _target_: database.OptunaStudyDumpCallback
+ storage: ${hydra.sweeper.storage}
+ study_name: ${hydra.sweeper.study_name}
+ directions:
+ - maximize
+ metric_names:
+ - accuracy
+ output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
+ sweeper:
+ sampler:
+ _target_: optuna.samplers.TPESampler
+ consider_prior: true
+ seed: 123
+ prior_weight: 1.0
+ consider_magic_clip: true
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
+ multivariate: true
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+ study_name: ${dataset}_${model_name}_${stage}
+ storage: sqlite:///optuna.db
+ n_trials: 128
+ n_jobs: 8
+ params:
+ +model.init.kernel: rbf,precomputed
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.gamma: scale,auto
+ +model.init.class_weight: balanced,null
+ model_name: ${model_name}
+ direction: ${direction}
+ max_failure_rate: 1.0
+ launcher:
+ _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
+ n_jobs: 8
+ prefer: processes
+ verbose: 1
+ timeout:
+ pre_dispatch: ${hydra.sweeper.n_jobs}
+ batch_size: auto
+ temp_folder: /tmp/deckard
+ max_nbytes: 100000
+ mmap_mode: r
outs:
- - path: sms_spam/plots/condensed_merged.csv
+ - path: truthseeker/logs/gzip_svc/20/symmetry_true
hash: md5
- md5: 8f549743001ca622a6c7c8cbb2b3d17d
- size: 5114716
- merge_condense@truthseeker:
- cmd: python merge.py --big_dir truthseeker/plots/ --data_file clean/condense/knn.csv
- --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder
- truthseeker/plots/ --output_file condensed_merged.csv
- deps:
- - path: truthseeker/plots/clean/condense/knn.csv
+ md5: 80d0c1ade291bb4dbc9af47eddab6d27.dir
+ size: 1250879
+ nfiles: 513
+ - path: truthseeker/reports/gzip_svc/20/symmetry_true/train/
hash: md5
- md5: a0c8deb8fe7617477ec43fae2a851b4d
- size: 1191230
- - path: truthseeker/plots/clean/condense/logistic.csv
+ md5: 913d1664491e029cb3e45e5fa1d9c2b1.dir
+ size: 546189
+ nfiles: 384
+ grid_search@20-truthseeker-gzip_svc-false:
+ cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
+ data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_svc/20/symmetry_false
+ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/20/study.csv
+ files.directory=truthseeker files.reports=reports/gzip_svc/20/symmetry_false
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_svc --multirun
+ deps:
+ - path: conf/gzip_svc.yaml
hash: md5
- md5: 9710addb440069a5ea884d90ed4c394a
- size: 1237939
- - path: truthseeker/plots/clean/condense/svc.csv
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
+ - path: params.yaml
hash: md5
- md5: a17c0cdb6a3fbfae5bd4fcfca1938a96
- size: 1257671
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
+ params:
+ conf/gzip_svc.yaml:
+ hydra:
+ run:
+ dir: ${dataset}/logs/${stage}/
+ sweep:
+ dir: ???
+ subdir: ${hydra.job.id}
+ callbacks:
+ study_dump:
+ _target_: database.OptunaStudyDumpCallback
+ storage: ${hydra.sweeper.storage}
+ study_name: ${hydra.sweeper.study_name}
+ directions:
+ - maximize
+ metric_names:
+ - accuracy
+ output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
+ sweeper:
+ sampler:
+ _target_: optuna.samplers.TPESampler
+ consider_prior: true
+ seed: 123
+ prior_weight: 1.0
+ consider_magic_clip: true
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
+ multivariate: true
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+ study_name: ${dataset}_${model_name}_${stage}
+ storage: sqlite:///optuna.db
+ n_trials: 128
+ n_jobs: 8
+ params:
+ +model.init.kernel: rbf,precomputed
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.gamma: scale,auto
+ +model.init.class_weight: balanced,null
+ model_name: ${model_name}
+ direction: ${direction}
+ max_failure_rate: 1.0
+ launcher:
+ _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
+ n_jobs: 8
+ prefer: processes
+ verbose: 1
+ timeout:
+ pre_dispatch: ${hydra.sweeper.n_jobs}
+ batch_size: auto
+ temp_folder: /tmp/deckard
+ max_nbytes: 100000
+ mmap_mode: r
outs:
- - path: truthseeker/plots/condensed_merged.csv
+ - path: truthseeker/logs/gzip_svc/20/symmetry_false
hash: md5
- md5: 738dc93bfff1b9c167949e722ee79665
- size: 3805499
- grid_search@300-ddos-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=300
+ md5: cd321e0e8ed96e2dc914d3f061139e1b.dir
+ size: 1250531
+ nfiles: 513
+ - path: truthseeker/reports/gzip_svc/20/symmetry_false/train/
+ hash: md5
+ md5: 7fd5bb25a3688c3470e30aeee85674ff.dir
+ size: 546474
+ nfiles: 384
+ grid_search@100-ddos-gzip_knn-true:
+ cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100
data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=ddos/logs/gzip_knn/300 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/300/study.csv
- files.directory=ddos files.reports=reports/gzip_knn/300 hydra.launcher.n_jobs=-1
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_knn/100/symmetry_true
+ hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/100/study.csv files.directory=ddos
+ files.reports=reports/gzip_knn/100/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
++raise_exception=True --config-name gzip_knn --multirun
deps:
- path: conf/gzip_knn.yaml
hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
conf/gzip_knn.yaml:
hydra:
@@ -17306,30 +7566,26 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
direction: ${direction}
storage: sqlite:///optuna.db
study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
+ n_trials: 128
+ n_jobs: 8
max_failure_rate: 1.0
params:
model.init.k: 1,3,5,7,11
+model.init.weights: uniform,distance
+model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -17341,367 +7597,115 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_knn
- outs:
- - path: ddos/logs/gzip_knn/300
- hash: md5
- md5: 1e533c118406ca2ffae2b0a3e11a5035.dir
- size: 1671182
- nfiles: 514
- - path: ddos/reports/gzip_knn/300/train/
- hash: md5
- md5: 000376454dd461f25065cdb093e78e7c.dir
- size: 1461265
- nfiles: 1403
- plot_condense@sms_spam:
- cmd: python -m deckard.layers.plots --path sms_spam/plots/ --file sms_spam/plots/condensed_merged.csv -c
- conf/condensed_plots.yaml
- deps:
- - path: sms_spam/plots/condensed_merged.csv
- hash: md5
- md5: 8f549743001ca622a6c7c8cbb2b3d17d
- size: 5114716
- params:
- conf/condensed_plots.yaml:
- line_plot:
- - file: sampling_method_vs_accuracy.pdf
- hue: model.init.sampling_method
- title:
- x: model.init.m
- xlabel: Percentage of Samples per Class
- y: accuracy
- ylabel: Accuracy
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim:
- - 0
- - 1
- y_scale: linear
- legend:
- title: Sampling Method
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: sampling_method_vs_train_time.pdf
- hue: model.init.sampling_method
- title:
- x: model.init.m
- xlabel: Percentage of Samples per Class
- y: train_time
- ylabel: Training Time (s)
- y_scale: linear
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim:
- - 0
- - 1
- legend:
- title: Sampling Method
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: sampling_method_vs_predict_time.pdf
- hue: model.init.sampling_method
- title:
- x: model.init.m
- xlabel: Percentage of Samples per Class
- y: predict_time
- ylabel: Prediction Time (s)
- y_scale: log
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim:
- - 0
- - 1
- legend:
- title: Sampling Method
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- outs:
- - path: sms_spam/plots/sampling_method_vs_accuracy.pdf
- hash: md5
- md5: 8d3c7b03379f2f16bdb6de450083608b
- size: 40643
- - path: sms_spam/plots/sampling_method_vs_predict_time.pdf
- hash: md5
- md5: 095622e64533aedee66d72079f141c0d
- size: 53902
- - path: sms_spam/plots/sampling_method_vs_train_time.pdf
- hash: md5
- md5: da26bd3fc967c9925975f6c8ad189a88
- size: 50367
- plot_condense@ddos:
- cmd: python -m deckard.layers.plots --path ddos/plots/ --file ddos/plots/condensed_merged.csv -c
- conf/condensed_plots.yaml
- deps:
- - path: ddos/plots/condensed_merged.csv
- hash: md5
- md5: dc147a2e9c585b39c5e212a46ade70ac
- size: 9306964
- params:
- conf/condensed_plots.yaml:
- line_plot:
- - file: sampling_method_vs_accuracy.pdf
- hue: model.init.sampling_method
- title:
- x: model.init.m
- xlabel: Percentage of Samples per Class
- y: accuracy
- ylabel: Accuracy
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim:
- - 0
- - 1
- y_scale: linear
- legend:
- title: Sampling Method
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: sampling_method_vs_train_time.pdf
- hue: model.init.sampling_method
- title:
- x: model.init.m
- xlabel: Percentage of Samples per Class
- y: train_time
- ylabel: Training Time (s)
- y_scale: linear
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim:
- - 0
- - 1
- legend:
- title: Sampling Method
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: sampling_method_vs_predict_time.pdf
- hue: model.init.sampling_method
- title:
- x: model.init.m
- xlabel: Percentage of Samples per Class
- y: predict_time
- ylabel: Prediction Time (s)
- y_scale: log
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim:
- - 0
- - 1
- legend:
- title: Sampling Method
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
outs:
- - path: ddos/plots/sampling_method_vs_accuracy.pdf
+ - path: ddos/logs/gzip_knn/100/symmetry_true
hash: md5
- md5: 09737e6b272979bf7fc879ece10d25e5
- size: 57907
- - path: ddos/plots/sampling_method_vs_predict_time.pdf
- hash: md5
- md5: 78e2e0111219f86d189dfb952d81cdba
- size: 78230
- - path: ddos/plots/sampling_method_vs_train_time.pdf
+ md5: ce684eab73c010891cc6eb844e066134.dir
+ size: 1190708
+ nfiles: 513
+ - path: ddos/reports/gzip_knn/100/symmetry_true/train/
hash: md5
- md5: ab34ce0b71b6c0153525b0194178ecaf
- size: 64512
- plot_condense@kdd_nsl:
- cmd: python -m deckard.layers.plots --path kdd_nsl/plots/ --file kdd_nsl/plots/condensed_merged.csv -c
- conf/condensed_plots.yaml
+ md5: 60e9b4f5171f22fb8144383380218108.dir
+ size: 81468
+ nfiles: 91
+ grid_search@100-ddos-gzip_knn-false:
+ cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100
+ data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_knn/100/symmetry_false
+ hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/100/study.csv files.directory=ddos
+ files.reports=reports/gzip_knn/100/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_knn --multirun
deps:
- - path: kdd_nsl/plots/condensed_merged.csv
+ - path: conf/gzip_knn.yaml
hash: md5
- md5: 1ddcee7de7db0c1a7d4898de4a03d7b7
- size: 4543759
- params:
- conf/condensed_plots.yaml:
- line_plot:
- - file: sampling_method_vs_accuracy.pdf
- hue: model.init.sampling_method
- title:
- x: model.init.m
- xlabel: Percentage of Samples per Class
- y: accuracy
- ylabel: Accuracy
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim:
- - 0
- - 1
- y_scale: linear
- legend:
- title: Sampling Method
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: sampling_method_vs_train_time.pdf
- hue: model.init.sampling_method
- title:
- x: model.init.m
- xlabel: Percentage of Samples per Class
- y: train_time
- ylabel: Training Time (s)
- y_scale: linear
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim:
- - 0
- - 1
- legend:
- title: Sampling Method
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: sampling_method_vs_predict_time.pdf
- hue: model.init.sampling_method
- title:
- x: model.init.m
- xlabel: Percentage of Samples per Class
- y: predict_time
- ylabel: Prediction Time (s)
- y_scale: log
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim:
- - 0
- - 1
- legend:
- title: Sampling Method
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- outs:
- - path: kdd_nsl/plots/sampling_method_vs_accuracy.pdf
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
+ - path: params.yaml
hash: md5
- md5: 1c673220cd32e3f9bd2aa92516d0b20e
- size: 38546
- - path: kdd_nsl/plots/sampling_method_vs_predict_time.pdf
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
+ params:
+ conf/gzip_knn.yaml:
+ hydra:
+ run:
+ dir: ${dataset}/logs/${stage}/
+ sweep:
+ dir: ???
+ subdir: ${hydra.job.num}
+ callbacks:
+ study_dump:
+ _target_: database.OptunaStudyDumpCallback
+ storage: ${hydra.sweeper.storage}
+ study_name: ${hydra.sweeper.study_name}
+ directions: ${direction}
+ metric_names: ${optimizers}
+ output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
+ sweeper:
+ sampler:
+ _target_: optuna.samplers.TPESampler
+ consider_prior: true
+ seed: 123
+ prior_weight: 1.0
+ consider_magic_clip: true
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
+ multivariate: true
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+ direction: ${direction}
+ storage: sqlite:///optuna.db
+ study_name: ${dataset}_${model_name}_${stage}
+ n_trials: 128
+ n_jobs: 8
+ max_failure_rate: 1.0
+ params:
+ model.init.k: 1,3,5,7,11
+ +model.init.weights: uniform,distance
+ +model.init.algorithm: brute
+ model_name: ${model_name}
+ launcher:
+ _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
+ n_jobs: 8
+ prefer: processes
+ verbose: 1
+ timeout:
+ pre_dispatch: ${hydra.sweeper.n_jobs}
+ batch_size: auto
+ temp_folder: /tmp/deckard
+ max_nbytes: 100000
+ mmap_mode: r
+ outs:
+ - path: ddos/logs/gzip_knn/100/symmetry_false
hash: md5
- md5: 4bcb086fcd47e05d2b79e30a12d15869
- size: 50187
- - path: kdd_nsl/plots/sampling_method_vs_train_time.pdf
+ md5: 307edd5cacb6d130cdca319d74e42152.dir
+ size: 1200449
+ nfiles: 513
+ - path: ddos/reports/gzip_knn/100/symmetry_false/train/
hash: md5
- md5: 2b3e91d9b656ba35d06f8e97d1e8359d
- size: 45992
- grid_search@300-ddos-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=300
+ md5: 9eb4c5ed862761d977cbec997e27a109.dir
+ size: 286576
+ nfiles: 321
+ grid_search@100-ddos-gzip_logistic-true:
+ cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100
data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=ddos/logs/gzip_logistic/300 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/300/study.csv
- files.directory=ddos files.reports=reports/gzip_logistic/300 hydra.launcher.n_jobs=-1
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_logistic/100/symmetry_true
+ hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/100/study.csv
+ files.directory=ddos files.reports=reports/gzip_logistic/100/symmetry_true hydra.launcher.n_jobs=-1
+ ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
++raise_exception=True --config-name gzip_logistic --multirun
deps:
- path: conf/gzip_logistic.yaml
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
conf/gzip_logistic.yaml:
hydra:
@@ -17721,31 +7725,27 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
+ n_trials: 128
+ n_jobs: 8
params:
+model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+model.init.fit_intercept: True,False
+model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -17759,36 +7759,38 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_logistic
outs:
- - path: ddos/logs/gzip_logistic/300
+ - path: ddos/logs/gzip_logistic/100/symmetry_true
hash: md5
- md5: ace39d7825de3ce5c0d678839c812ab6.dir
- size: 1765030
- nfiles: 514
- - path: ddos/reports/gzip_logistic/300/train/
+ md5: d0b4bd67c2297fcf7cd87b5bb49830ce.dir
+ size: 1236038
+ nfiles: 513
+ - path: ddos/reports/gzip_logistic/100/symmetry_true/train/
hash: md5
- md5: 9f23532033970310bd5915d4018de935.dir
- size: 1436932
- nfiles: 963
- grid_search@300-ddos-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=300
- data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=ddos/logs/gzip_svc/300 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/300/study.csv
- files.directory=ddos files.reports=reports/gzip_svc/300 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_svc --multirun
+ md5: 3f4bc5d4c66937cccc23ae865cd69762.dir
+ size: 636279
+ nfiles: 332
+ grid_search@100-ddos-gzip_logistic-false:
+ cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100
+ data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_logistic/100/symmetry_false
+ hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/100/study.csv
+ files.directory=ddos files.reports=reports/gzip_logistic/100/symmetry_false
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_logistic --multirun
deps:
- - path: conf/gzip_svc.yaml
+ - path: conf/gzip_logistic.yaml
hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_svc.yaml:
+ conf/gzip_logistic.yaml:
hydra:
run:
dir: ${dataset}/logs/${stage}/
@@ -17800,37 +7802,33 @@ stages:
_target_: database.OptunaStudyDumpCallback
storage: ${hydra.sweeper.storage}
study_name: ${hydra.sweeper.study_name}
- directions:
- - maximize
- metric_names:
- - accuracy
+ directions: ${direction}
+ metric_names: ${optimizers}
output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
params:
- +model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.gamma: scale,auto
- +model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ +model.init.solver: saga
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.fit_intercept: True,False
+ +model.init.class_weight: balanced,None
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -17844,147 +7842,37 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_svc
- outs:
- - path: ddos/logs/gzip_svc/300
- hash: md5
- md5: 7681421b662e0a0690e9a1a6a4cf4b79.dir
- size: 1710386
- nfiles: 514
- - path: ddos/reports/gzip_svc/300/train/
- hash: md5
- md5: c872a806e708289c65e6856bc2a057bf.dir
- size: 1393355
- nfiles: 1045
- plot_condense@truthseeker:
- cmd: python -m deckard.layers.plots --path truthseeker/plots/ --file truthseeker/plots/condensed_merged.csv -c
- conf/condensed_plots.yaml
- deps:
- - path: truthseeker/plots/condensed_merged.csv
- hash: md5
- md5: 738dc93bfff1b9c167949e722ee79665
- size: 3805499
- params:
- conf/condensed_plots.yaml:
- line_plot:
- - file: sampling_method_vs_accuracy.pdf
- hue: model.init.sampling_method
- title:
- x: model.init.m
- xlabel: Percentage of Samples per Class
- y: accuracy
- ylabel: Accuracy
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim:
- - 0
- - 1
- y_scale: linear
- legend:
- title: Sampling Method
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: sampling_method_vs_train_time.pdf
- hue: model.init.sampling_method
- title:
- x: model.init.m
- xlabel: Percentage of Samples per Class
- y: train_time
- ylabel: Training Time (s)
- y_scale: linear
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim:
- - 0
- - 1
- legend:
- title: Sampling Method
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
- - file: sampling_method_vs_predict_time.pdf
- hue: model.init.sampling_method
- title:
- x: model.init.m
- xlabel: Percentage of Samples per Class
- y: predict_time
- ylabel: Prediction Time (s)
- y_scale: log
- hue_order:
- - random
- - svc
- - knn
- - sum
- - medoid
- - nearmiss
- - hardness
- errorbar: se
- err_style: bars
- xlim:
- - 0
- - 1
- legend:
- title: Sampling Method
- bbox_to_anchor:
- - 1.05
- - 0.5
- loc: center left
- prop:
- size: 14
outs:
- - path: truthseeker/plots/sampling_method_vs_accuracy.pdf
- hash: md5
- md5: 0d293f64173585cb19c88218a7327f83
- size: 18158
- - path: truthseeker/plots/sampling_method_vs_predict_time.pdf
+ - path: ddos/logs/gzip_logistic/100/symmetry_false
hash: md5
- md5: bb494d7b950451096bb639f3a9f1b4cb
- size: 45092
- - path: truthseeker/plots/sampling_method_vs_train_time.pdf
+ md5: 54987f50efd1f9833711c4bce8ad266b.dir
+ size: 1204334
+ nfiles: 513
+ - path: ddos/reports/gzip_logistic/100/symmetry_false/train/
hash: md5
- md5: 85a9eeb8f5aecc63f5634b12483941cf
- size: 39796
- grid_search@500-ddos-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=500
- data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=ddos/logs/gzip_logistic/500 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/500/study.csv
- files.directory=ddos files.reports=reports/gzip_logistic/500 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_logistic --multirun
+ md5: 4237b3f9a08decdbf109a54fce741a4e.dir
+ size: 659696
+ nfiles: 306
+ grid_search@100-ddos-gzip_svc-true:
+ cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100
+ data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_svc/100/symmetry_true
+ hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/100/study.csv files.directory=ddos
+ files.reports=reports/gzip_svc/100/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_svc --multirun
deps:
- - path: conf/gzip_logistic.yaml
+ - path: conf/gzip_svc.yaml
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_logistic.yaml:
+ conf/gzip_svc.yaml:
hydra:
run:
dir: ${dataset}/logs/${stage}/
@@ -17996,37 +7884,33 @@ stages:
_target_: database.OptunaStudyDumpCallback
storage: ${hydra.sweeper.storage}
study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
+ directions:
+ - maximize
+ metric_names:
+ - accuracy
output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
+ n_trials: 128
+ n_jobs: 8
params:
- +model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.fit_intercept: True,False
- +model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ +model.init.kernel: rbf,precomputed
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.gamma: scale,auto
+ +model.init.class_weight: balanced,null
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -18040,34 +7924,35 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_logistic
outs:
- - path: ddos/logs/gzip_logistic/500
- hash: md5
- md5: afb6463625f139e82a88976c24b93f16.dir
- size: 1791134
- nfiles: 514
- - path: ddos/reports/gzip_logistic/500/train/
- hash: md5
- md5: dbed10dfbc2747c79e14dcedcbce0661.dir
- size: 968208
- nfiles: 702
- grid_search@500-ddos-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=500
+ - path: ddos/logs/gzip_svc/100/symmetry_true
+ hash: md5
+ md5: 8f54e554e59aa39da2cc6a545a2b2a84.dir
+ size: 1238692
+ nfiles: 513
+ - path: ddos/reports/gzip_svc/100/symmetry_true/train/
+ hash: md5
+ md5: 1d55a1ad04addb2611ea268d0d5c037c.dir
+ size: 552051
+ nfiles: 384
+ grid_search@100-ddos-gzip_svc-false:
+ cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100
data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=ddos/logs/gzip_svc/500 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/500/study.csv
- files.directory=ddos files.reports=reports/gzip_svc/500 hydra.launcher.n_jobs=-1
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_svc/100/symmetry_false
+ hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/100/study.csv files.directory=ddos
+ files.reports=reports/gzip_svc/100/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
++raise_exception=True --config-name gzip_svc --multirun
deps:
- path: conf/gzip_svc.yaml
hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
conf/gzip_svc.yaml:
hydra:
@@ -18089,29 +7974,25 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
params:
+model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+model.init.gamma: scale,auto
+model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -18125,43 +8006,122 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_svc
outs:
- - path: ddos/logs/gzip_svc/500
+ - path: ddos/logs/gzip_svc/100/symmetry_false
hash: md5
- md5: 319357234ff9123f09bb6603fe74866f.dir
- size: 1737584
- nfiles: 514
- - path: ddos/reports/gzip_svc/500/train/
+ md5: 20385e7fa159098729a46a9ec8ad3e2f.dir
+ size: 1240441
+ nfiles: 513
+ - path: ddos/reports/gzip_svc/100/symmetry_false/train/
hash: md5
- md5: 63ecb36bf4e16027b60bcd2892330829.dir
- size: 897567
- nfiles: 768
- grid_search@100-sms_spam-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic
- model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_sms_spam
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/100
- hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/100/study.csv
- files.directory=sms_spam files.reports=reports/gzip_logistic/100 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_logistic --multirun
+ md5: 006736d48dc878223021e6c5cc721e21.dir
+ size: 552730
+ nfiles: 384
+ grid_search@100-kdd_nsl-gzip_knn-true:
+ cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_knn/100/symmetry_true
+ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/100/study.csv files.directory=kdd_nsl
+ files.reports=reports/gzip_knn/100/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_knn --multirun
deps:
- - path: conf/gzip_logistic.yaml
+ - path: conf/gzip_knn.yaml
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_logistic.yaml:
+ conf/gzip_knn.yaml:
hydra:
run:
dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
- subdir: ${hydra.job.id}
+ subdir: ${hydra.job.num}
+ callbacks:
+ study_dump:
+ _target_: database.OptunaStudyDumpCallback
+ storage: ${hydra.sweeper.storage}
+ study_name: ${hydra.sweeper.study_name}
+ directions: ${direction}
+ metric_names: ${optimizers}
+ output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
+ sweeper:
+ sampler:
+ _target_: optuna.samplers.TPESampler
+ consider_prior: true
+ seed: 123
+ prior_weight: 1.0
+ consider_magic_clip: true
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
+ multivariate: true
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+ direction: ${direction}
+ storage: sqlite:///optuna.db
+ study_name: ${dataset}_${model_name}_${stage}
+ n_trials: 128
+ n_jobs: 8
+ max_failure_rate: 1.0
+ params:
+ model.init.k: 1,3,5,7,11
+ +model.init.weights: uniform,distance
+ +model.init.algorithm: brute
+ model_name: ${model_name}
+ launcher:
+ _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
+ n_jobs: 8
+ prefer: processes
+ verbose: 1
+ timeout:
+ pre_dispatch: ${hydra.sweeper.n_jobs}
+ batch_size: auto
+ temp_folder: /tmp/deckard
+ max_nbytes: 100000
+ mmap_mode: r
+ outs:
+ - path: kdd_nsl/logs/gzip_knn/100/symmetry_true
+ hash: md5
+ md5: 549fe2e753e0bcf601fd788dec7aeb1e.dir
+ size: 1188776
+ nfiles: 513
+ - path: kdd_nsl/reports/gzip_knn/100/symmetry_true/train/
+ hash: md5
+ md5: c98bd9dce2feec89f7aec764a2c6d1e7.dir
+ size: 179210
+ nfiles: 190
+ grid_search@100-kdd_nsl-gzip_knn-false:
+ cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_knn/100/symmetry_false
+ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/100/study.csv files.directory=kdd_nsl
+ files.reports=reports/gzip_knn/100/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_knn --multirun
+ deps:
+ - path: conf/gzip_knn.yaml
+ hash: md5
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
+ - path: params.yaml
+ hash: md5
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
+ params:
+ conf/gzip_knn.yaml:
+ hydra:
+ run:
+ dir: ${dataset}/logs/${stage}/
+ sweep:
+ dir: ???
+ subdir: ${hydra.job.num}
callbacks:
study_dump:
_target_: database.OptunaStudyDumpCallback
@@ -18173,33 +8133,26 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
+ direction: ${direction}
storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
+ study_name: ${dataset}_${model_name}_${stage}
+ n_trials: 128
+ n_jobs: 8
+ max_failure_rate: 1.0
params:
- +model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.fit_intercept: True,False
- +model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ model.init.k: 1,3,5,7,11
+ +model.init.weights: uniform,distance
+ +model.init.algorithm: brute
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -18211,36 +8164,38 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_logistic
outs:
- - path: sms_spam/logs/gzip_logistic/100
+ - path: kdd_nsl/logs/gzip_knn/100/symmetry_false
hash: md5
- md5: d1120618c5a674fe50c5717e2d71d640.dir
- size: 1554813
- nfiles: 514
- - path: sms_spam/reports/gzip_logistic/100/train/
+ md5: 0a1d8131642b28351971a5294828d0d7.dir
+ size: 1127001
+ nfiles: 513
+ - path: kdd_nsl/reports/gzip_knn/100/symmetry_false/train/
hash: md5
- md5: 89f61791ac36513c4957057485a2e8e3.dir
- size: 553318
- nfiles: 357
- grid_search@100-sms_spam-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=sms_spam/logs/gzip_svc/100 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/100/study.csv
- files.directory=sms_spam files.reports=reports/gzip_svc/100 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_svc --multirun
+ md5: abf88a5a4a306ec284320cf3aa409135.dir
+ size: 155023
+ nfiles: 138
+ grid_search@100-kdd_nsl-gzip_logistic-true:
+ cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic
+ model.init.distance_matrix=null model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_kdd_nsl
+ hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/100/symmetry_true
+ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/100/study.csv
+ files.directory=kdd_nsl files.reports=reports/gzip_logistic/100/symmetry_true
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_logistic --multirun
deps:
- - path: conf/gzip_svc.yaml
+ - path: conf/gzip_logistic.yaml
hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_svc.yaml:
+ conf/gzip_logistic.yaml:
hydra:
run:
dir: ${dataset}/logs/${stage}/
@@ -18252,37 +8207,33 @@ stages:
_target_: database.OptunaStudyDumpCallback
storage: ${hydra.sweeper.storage}
study_name: ${hydra.sweeper.study_name}
- directions:
- - maximize
- metric_names:
- - accuracy
+ directions: ${direction}
+ metric_names: ${optimizers}
output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
params:
- +model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.gamma: scale,auto
- +model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ +model.init.solver: saga
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.fit_intercept: True,False
+ +model.init.class_weight: balanced,None
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -18296,42 +8247,44 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_svc
outs:
- - path: sms_spam/logs/gzip_svc/100
+ - path: kdd_nsl/logs/gzip_logistic/100/symmetry_true
hash: md5
- md5: cb8e4936d6ee03af99fa775d8b4b956b.dir
- size: 1483653
- nfiles: 514
- - path: sms_spam/reports/gzip_svc/100/train/
+ md5: e57d0862551308c0ec0cabd6542a55e5.dir
+ size: 1239394
+ nfiles: 513
+ - path: kdd_nsl/reports/gzip_logistic/100/symmetry_true/train/
hash: md5
- md5: ae31535b48c489e3040a2836c43215a5.dir
- size: 543085
- nfiles: 384
- grid_search@300-kdd_nsl-gzip_knn:
+ md5: af7ccccb3c94a39edbbd239e9cc2a6ae.dir
+ size: 646824
+ nfiles: 327
+ grid_search@100-kdd_nsl-gzip_logistic-false:
cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=kdd_nsl/logs/gzip_knn/300 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/300/study.csv
- files.directory=kdd_nsl files.reports=reports/gzip_knn/300 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_knn --multirun
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic
+ model.init.distance_matrix=null model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_kdd_nsl
+ hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/100/symmetry_false
+ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/100/study.csv
+ files.directory=kdd_nsl files.reports=reports/gzip_logistic/100/symmetry_false
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_logistic --multirun
deps:
- - path: conf/gzip_knn.yaml
+ - path: conf/gzip_logistic.yaml
hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_knn.yaml:
+ conf/gzip_logistic.yaml:
hydra:
run:
dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
- subdir: ${hydra.job.num}
+ subdir: ${hydra.job.id}
callbacks:
study_dump:
_target_: database.OptunaStudyDumpCallback
@@ -18343,30 +8296,29 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- direction: ${direction}
- storage: sqlite:///optuna.db
study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
- max_failure_rate: 1.0
+ storage: sqlite:///optuna.db
+ n_trials: 128
+ n_jobs: 8
params:
- model.init.k: 1,3,5,7,11
- +model.init.weights: uniform,distance
- +model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ +model.init.solver: saga
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.fit_intercept: True,False
+ +model.init.class_weight: balanced,None
model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
+ direction: ${direction}
+ max_failure_rate: 1.0
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -18378,37 +8330,37 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_knn
outs:
- - path: kdd_nsl/logs/gzip_knn/300
+ - path: kdd_nsl/logs/gzip_logistic/100/symmetry_false
hash: md5
- md5: d3f58cbd5181a4f86ac660aba7173dfb.dir
- size: 1437824
- nfiles: 514
- - path: kdd_nsl/reports/gzip_knn/300/train/
+ md5: 3ee2c47866f4ce98afa41e1d10dc99c8.dir
+ size: 1285300
+ nfiles: 513
+ - path: kdd_nsl/reports/gzip_logistic/100/symmetry_false/train/
hash: md5
- md5: d5317915e16e54a5fb4c82963cc0b058.dir
- size: 825336
- nfiles: 612
- grid_search@300-kdd_nsl-gzip_logistic:
+ md5: c7034228ec933542633506b363bdd18a.dir
+ size: 586323
+ nfiles: 367
+ grid_search@100-kdd_nsl-gzip_svc-true:
cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_logistic
- model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_kdd_nsl
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/300
- hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/300/study.csv
- files.directory=kdd_nsl files.reports=reports/gzip_logistic/300 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_logistic --multirun
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_svc/100/symmetry_true
+ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/100/study.csv files.directory=kdd_nsl
+ files.reports=reports/gzip_svc/100/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_svc --multirun
deps:
- - path: conf/gzip_logistic.yaml
+ - path: conf/gzip_svc.yaml
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_logistic.yaml:
+ conf/gzip_svc.yaml:
hydra:
run:
dir: ${dataset}/logs/${stage}/
@@ -18420,37 +8372,33 @@ stages:
_target_: database.OptunaStudyDumpCallback
storage: ${hydra.sweeper.storage}
study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
+ directions:
+ - maximize
+ metric_names:
+ - accuracy
output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
+ n_trials: 128
+ n_jobs: 8
params:
- +model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.fit_intercept: True,False
- +model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ +model.init.kernel: rbf,precomputed
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.gamma: scale,auto
+ +model.init.class_weight: balanced,null
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -18464,34 +8412,35 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_logistic
outs:
- - path: kdd_nsl/logs/gzip_logistic/300
+ - path: kdd_nsl/logs/gzip_svc/100/symmetry_true
hash: md5
- md5: 6793362a9053b6f28647bb49875ebcf3.dir
- size: 1634660
- nfiles: 514
- - path: kdd_nsl/reports/gzip_logistic/300/train/
+ md5: 66d83844ef05adb0a121fce7b252b683.dir
+ size: 1250230
+ nfiles: 513
+ - path: kdd_nsl/reports/gzip_svc/100/symmetry_true/train/
hash: md5
- md5: f2a46e55c8597a4d4082202f69186083.dir
- size: 945424
- nfiles: 723
- grid_search@300-kdd_nsl-gzip_svc:
+ md5: 9de34dd6d2fb5ad4ebb92c7dfcf05629.dir
+ size: 555703
+ nfiles: 384
+ grid_search@100-kdd_nsl-gzip_svc-false:
cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=kdd_nsl/logs/gzip_svc/300 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/300/study.csv
- files.directory=kdd_nsl files.reports=reports/gzip_svc/300 hydra.launcher.n_jobs=-1
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_svc/100/symmetry_false
+ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/100/study.csv files.directory=kdd_nsl
+ files.reports=reports/gzip_svc/100/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
++raise_exception=True --config-name gzip_svc --multirun
deps:
- path: conf/gzip_svc.yaml
hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
conf/gzip_svc.yaml:
hydra:
@@ -18513,29 +8462,25 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
params:
+model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+model.init.gamma: scale,auto
+model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -18549,34 +8494,36 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_svc
outs:
- - path: kdd_nsl/logs/gzip_svc/300
+ - path: kdd_nsl/logs/gzip_svc/100/symmetry_false
hash: md5
- md5: 1bd3b191acf0f78e361e1bc3cb6df928.dir
- size: 1584389
- nfiles: 514
- - path: kdd_nsl/reports/gzip_svc/300/train/
+ md5: 977a69c4aa921c8559e687b1ca7fb3b6.dir
+ size: 1244242
+ nfiles: 513
+ - path: kdd_nsl/reports/gzip_svc/100/symmetry_false/train/
hash: md5
- md5: b6e64c8b751bf3a140aa9871f341a173.dir
- size: 899234
- nfiles: 765
- grid_search@300-sms_spam-gzip_knn:
+ md5: 4dafa970272be8aa5c954ef2c8883ce1.dir
+ size: 555022
+ nfiles: 384
+ grid_search@100-sms_spam-gzip_knn-true:
cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=sms_spam/logs/gzip_knn/300 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/300/study.csv
- files.directory=sms_spam files.reports=reports/gzip_knn/300 hydra.launcher.n_jobs=-1
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_knn/100/symmetry_true
+ hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/100/study.csv
+ files.directory=sms_spam files.reports=reports/gzip_knn/100/symmetry_true hydra.launcher.n_jobs=-1
+ ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
++raise_exception=True --config-name gzip_knn --multirun
deps:
- path: conf/gzip_knn.yaml
hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
conf/gzip_knn.yaml:
hydra:
@@ -18596,30 +8543,26 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
direction: ${direction}
storage: sqlite:///optuna.db
study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
+ n_trials: 128
+ n_jobs: 8
max_failure_rate: 1.0
params:
model.init.k: 1,3,5,7,11
+model.init.weights: uniform,distance
+model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -18631,43 +8574,44 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_knn
outs:
- - path: sms_spam/logs/gzip_knn/300
+ - path: sms_spam/logs/gzip_knn/100/symmetry_true
hash: md5
- md5: 09019492218a189aabe0601cb4c3f3a3.dir
- size: 1460894
- nfiles: 514
- - path: sms_spam/reports/gzip_knn/300/train/
+ md5: 78ca4529619f53661b14a5d0c4cb99bd.dir
+ size: 1086010
+ nfiles: 513
+ - path: sms_spam/reports/gzip_knn/100/symmetry_true/train/
hash: md5
- md5: 3aa09498a167a50051ee2fdf3e46d62d.dir
- size: 364240
- nfiles: 349
- grid_search@300-sms_spam-gzip_logistic:
+ md5: 688b101d8f5ff7b2e466c0e9492e3d6a.dir
+ size: 107355
+ nfiles: 118
+ grid_search@100-sms_spam-gzip_knn-false:
cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_logistic
- model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_sms_spam
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/300
- hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/300/study.csv
- files.directory=sms_spam files.reports=reports/gzip_logistic/300 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_logistic --multirun
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_knn/100/symmetry_false
+ hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/100/study.csv
+ files.directory=sms_spam files.reports=reports/gzip_knn/100/symmetry_false hydra.launcher.n_jobs=-1
+ ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_knn --multirun
deps:
- - path: conf/gzip_logistic.yaml
+ - path: conf/gzip_knn.yaml
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_logistic.yaml:
+ conf/gzip_knn.yaml:
hydra:
run:
dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
- subdir: ${hydra.job.id}
+ subdir: ${hydra.job.num}
callbacks:
study_dump:
_target_: database.OptunaStudyDumpCallback
@@ -18679,33 +8623,26 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
+ direction: ${direction}
storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
+ study_name: ${dataset}_${model_name}_${stage}
+ n_trials: 128
+ n_jobs: 8
+ max_failure_rate: 1.0
params:
- +model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.fit_intercept: True,False
- +model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ model.init.k: 1,3,5,7,11
+ +model.init.weights: uniform,distance
+ +model.init.algorithm: brute
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -18717,36 +8654,38 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_logistic
outs:
- - path: sms_spam/logs/gzip_logistic/300
+ - path: sms_spam/logs/gzip_knn/100/symmetry_false
hash: md5
- md5: 627574a996abf0037be2b9d798c0a1f6.dir
- size: 1593011
- nfiles: 514
- - path: sms_spam/reports/gzip_logistic/300/train/
+ md5: b77d9d0576d484d42fa24401a1d81509.dir
+ size: 1142222
+ nfiles: 513
+ - path: sms_spam/reports/gzip_knn/100/symmetry_false/train/
hash: md5
- md5: 886edc50f38dc580603074bf8dc46835.dir
- size: 553839
- nfiles: 363
- grid_search@300-sms_spam-gzip_svc:
+ md5: 663f10d7b2a3647caecaa978b7b7d983.dir
+ size: 119667
+ nfiles: 117
+ grid_search@100-sms_spam-gzip_logistic-true:
cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=sms_spam/logs/gzip_svc/300 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/300/study.csv
- files.directory=sms_spam files.reports=reports/gzip_svc/300 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_svc --multirun
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic
+ model.init.distance_matrix=null model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_sms_spam
+ hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/100/symmetry_true
+ hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/100/study.csv
+ files.directory=sms_spam files.reports=reports/gzip_logistic/100/symmetry_true
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_logistic --multirun
deps:
- - path: conf/gzip_svc.yaml
+ - path: conf/gzip_logistic.yaml
hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_svc.yaml:
+ conf/gzip_logistic.yaml:
hydra:
run:
dir: ${dataset}/logs/${stage}/
@@ -18758,37 +8697,33 @@ stages:
_target_: database.OptunaStudyDumpCallback
storage: ${hydra.sweeper.storage}
study_name: ${hydra.sweeper.study_name}
- directions:
- - maximize
- metric_names:
- - accuracy
+ directions: ${direction}
+ metric_names: ${optimizers}
output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
params:
- +model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.gamma: scale,auto
- +model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ +model.init.solver: saga
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.fit_intercept: True,False
+ +model.init.class_weight: balanced,None
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -18802,42 +8737,44 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_svc
outs:
- - path: sms_spam/logs/gzip_svc/300
+ - path: sms_spam/logs/gzip_logistic/100/symmetry_true
hash: md5
- md5: 7d9d939af4228ad75b78ee5c347a984a.dir
- size: 1513139
- nfiles: 514
- - path: sms_spam/reports/gzip_svc/300/train/
+ md5: 517eb16a845fa795e775ef9a68e0a0c6.dir
+ size: 1234485
+ nfiles: 513
+ - path: sms_spam/reports/gzip_logistic/100/symmetry_true/train/
hash: md5
- md5: cb8713e4f13494c3c1ab3c93c238d2d7.dir
- size: 544369
- nfiles: 384
- grid_search@300-truthseeker-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=truthseeker/logs/gzip_knn/300 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/300/study.csv
- files.directory=truthseeker files.reports=reports/gzip_knn/300 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_knn --multirun
+ md5: 80878d8c169e37e8110005c63a1ee5d0.dir
+ size: 635861
+ nfiles: 326
+ grid_search@100-sms_spam-gzip_logistic-false:
+ cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic
+ model.init.distance_matrix=null model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_sms_spam
+ hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/100/symmetry_false
+ hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/100/study.csv
+ files.directory=sms_spam files.reports=reports/gzip_logistic/100/symmetry_false
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_logistic --multirun
deps:
- - path: conf/gzip_knn.yaml
+ - path: conf/gzip_logistic.yaml
hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_knn.yaml:
+ conf/gzip_logistic.yaml:
hydra:
run:
dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
- subdir: ${hydra.job.num}
+ subdir: ${hydra.job.id}
callbacks:
study_dump:
_target_: database.OptunaStudyDumpCallback
@@ -18849,30 +8786,29 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- direction: ${direction}
- storage: sqlite:///optuna.db
study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
- max_failure_rate: 1.0
+ storage: sqlite:///optuna.db
+ n_trials: 128
+ n_jobs: 8
params:
- model.init.k: 1,3,5,7,11
- +model.init.weights: uniform,distance
- +model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ +model.init.solver: saga
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.fit_intercept: True,False
+ +model.init.class_weight: balanced,None
model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
+ direction: ${direction}
+ max_failure_rate: 1.0
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -18884,37 +8820,38 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_knn
outs:
- - path: truthseeker/logs/gzip_knn/300
+ - path: sms_spam/logs/gzip_logistic/100/symmetry_false
hash: md5
- md5: 7fc2fb64903d90052db980e395a73a1b.dir
- size: 1418937
- nfiles: 514
- - path: truthseeker/reports/gzip_knn/300/train/
+ md5: 394ed9398208455dae29046d35774913.dir
+ size: 1229002
+ nfiles: 513
+ - path: sms_spam/reports/gzip_logistic/100/symmetry_false/train/
hash: md5
- md5: 1b7d0b73ddb24fa30f48675625cad64c.dir
- size: 384561
- nfiles: 332
- grid_search@300-truthseeker-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_logistic
- model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_truthseeker
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/300
- hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/300/study.csv
- files.directory=truthseeker files.reports=reports/gzip_logistic/300 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_logistic --multirun
+ md5: 1bd2509e914115c6a834f630872fe406.dir
+ size: 628941
+ nfiles: 323
+ grid_search@100-sms_spam-gzip_svc-true:
+ cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_svc/100/symmetry_true
+ hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/100/study.csv
+ files.directory=sms_spam files.reports=reports/gzip_svc/100/symmetry_true hydra.launcher.n_jobs=-1
+ ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_svc --multirun
deps:
- - path: conf/gzip_logistic.yaml
+ - path: conf/gzip_svc.yaml
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_logistic.yaml:
+ conf/gzip_svc.yaml:
hydra:
run:
dir: ${dataset}/logs/${stage}/
@@ -18926,37 +8863,33 @@ stages:
_target_: database.OptunaStudyDumpCallback
storage: ${hydra.sweeper.storage}
study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
+ directions:
+ - maximize
+ metric_names:
+ - accuracy
output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
+ n_trials: 128
+ n_jobs: 8
params:
- +model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.fit_intercept: True,False
- +model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ +model.init.kernel: rbf,precomputed
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.gamma: scale,auto
+ +model.init.class_weight: balanced,null
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -18970,34 +8903,36 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_logistic
outs:
- - path: truthseeker/logs/gzip_logistic/300
+ - path: sms_spam/logs/gzip_svc/100/symmetry_true
hash: md5
- md5: 121b624ea70d27aba89bd5448c35580f.dir
- size: 1564349
- nfiles: 514
- - path: truthseeker/reports/gzip_logistic/300/train/
+ md5: c0931c4a2af0f0b39b4fb699e5ff8850.dir
+ size: 1246641
+ nfiles: 513
+ - path: sms_spam/reports/gzip_svc/100/symmetry_true/train/
hash: md5
- md5: 7dfeff37b85b221b60c7bad442f21658.dir
- size: 557318
- nfiles: 367
- grid_search@300-truthseeker-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=truthseeker/logs/gzip_svc/300 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/300/study.csv
- files.directory=truthseeker files.reports=reports/gzip_svc/300 hydra.launcher.n_jobs=-1
+ md5: 903ac9307687b483ee7f60f5c5a9e068.dir
+ size: 543384
+ nfiles: 384
+ grid_search@100-sms_spam-gzip_svc-false:
+ cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_svc/100/symmetry_false
+ hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/100/study.csv
+ files.directory=sms_spam files.reports=reports/gzip_svc/100/symmetry_false hydra.launcher.n_jobs=-1
+ ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
++raise_exception=True --config-name gzip_svc --multirun
deps:
- path: conf/gzip_svc.yaml
hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
conf/gzip_svc.yaml:
hydra:
@@ -19019,29 +8954,25 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
params:
+model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+model.init.gamma: scale,auto
+model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -19055,34 +8986,36 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_svc
outs:
- - path: truthseeker/logs/gzip_svc/300
+ - path: sms_spam/logs/gzip_svc/100/symmetry_false
hash: md5
- md5: c1b03e3fa37ca812864d04d3a38216db.dir
- size: 1536045
- nfiles: 514
- - path: truthseeker/reports/gzip_svc/300/train/
+ md5: f37630902004d80cb73ff229905ca426.dir
+ size: 1247648
+ nfiles: 513
+ - path: sms_spam/reports/gzip_svc/100/symmetry_false/train/
hash: md5
- md5: 2cf3648372291b72f9b16020c5c3ad4e.dir
- size: 548358
+ md5: 58dc217409a236b747a999da2ef4cee1.dir
+ size: 543731
nfiles: 384
- grid_search@500-ddos-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=500
- data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=ddos/logs/gzip_knn/500 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/500/study.csv
- files.directory=ddos files.reports=reports/gzip_knn/500 hydra.launcher.n_jobs=-1
+ grid_search@100-truthseeker-gzip_knn-true:
+ cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_knn/100/symmetry_true
+ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/100/study.csv
+ files.directory=truthseeker files.reports=reports/gzip_knn/100/symmetry_true
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
++raise_exception=True --config-name gzip_knn --multirun
deps:
- path: conf/gzip_knn.yaml
hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
conf/gzip_knn.yaml:
hydra:
@@ -19102,30 +9035,26 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
direction: ${direction}
storage: sqlite:///optuna.db
study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
+ n_trials: 128
+ n_jobs: 8
max_failure_rate: 1.0
params:
model.init.k: 1,3,5,7,11
+model.init.weights: uniform,distance
+model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -19137,34 +9066,36 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_knn
outs:
- - path: ddos/logs/gzip_knn/500
+ - path: truthseeker/logs/gzip_knn/100/symmetry_true
hash: md5
- md5: ebb76a3ffe046f5763072644ec826dd9.dir
- size: 1693130
- nfiles: 514
- - path: ddos/reports/gzip_knn/500/train/
+ md5: 3bb5017fdd0b61fd7b5be594c4dd0b9c.dir
+ size: 1193938
+ nfiles: 513
+ - path: truthseeker/reports/gzip_knn/100/symmetry_true/train/
hash: md5
- md5: 00682fbb7c897d179ed788f09be3b1e9.dir
- size: 732559
- nfiles: 763
- grid_search@500-kdd_nsl-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=kdd_nsl/logs/gzip_knn/500 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/500/study.csv
- files.directory=kdd_nsl files.reports=reports/gzip_knn/500 hydra.launcher.n_jobs=-1
+ md5: c0ef5fa56bc9c65e6b6abe943f424be6.dir
+ size: 227250
+ nfiles: 244
+ grid_search@100-truthseeker-gzip_knn-false:
+ cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_knn/100/symmetry_false
+ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/100/study.csv
+ files.directory=truthseeker files.reports=reports/gzip_knn/100/symmetry_false
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
++raise_exception=True --config-name gzip_knn --multirun
deps:
- path: conf/gzip_knn.yaml
hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
+ md5: 2d0f54d62dcdc05d21ea1730899de0bb
+ size: 1827
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
conf/gzip_knn.yaml:
hydra:
@@ -19184,30 +9115,26 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
direction: ${direction}
storage: sqlite:///optuna.db
study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
+ n_trials: 128
+ n_jobs: 8
max_failure_rate: 1.0
params:
model.init.k: 1,3,5,7,11
+model.init.weights: uniform,distance
+model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -19219,35 +9146,36 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_knn
outs:
- - path: kdd_nsl/logs/gzip_knn/500
+ - path: truthseeker/logs/gzip_knn/100/symmetry_false
hash: md5
- md5: f1d5a2b6b59bc61a8c8d9c52d3a2ad11.dir
- size: 1496906
- nfiles: 514
- - path: kdd_nsl/reports/gzip_knn/500/train/
+ md5: 77709b1d2f5973a004742328fa7ccf46.dir
+ size: 1173316
+ nfiles: 513
+ - path: truthseeker/reports/gzip_knn/100/symmetry_false/train/
hash: md5
- md5: bffa17c78573257f1d85dccf5d93fade.dir
- size: 388686
- nfiles: 335
- grid_search@500-kdd_nsl-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_logistic
- model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_kdd_nsl
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/500
- hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/500/study.csv
- files.directory=kdd_nsl files.reports=reports/gzip_logistic/500 hydra.launcher.n_jobs=-1
+ md5: 0a3609651300c7e4d773fdce2af08984.dir
+ size: 171434
+ nfiles: 160
+ grid_search@100-truthseeker-gzip_logistic-true:
+ cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic
+ model.init.distance_matrix=null model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_truthseeker
+ hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/100/symmetry_true
+ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/100/study.csv
+ files.directory=truthseeker files.reports=reports/gzip_logistic/100/symmetry_true
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
++raise_exception=True --config-name gzip_logistic --multirun
deps:
- path: conf/gzip_logistic.yaml
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
conf/gzip_logistic.yaml:
hydra:
@@ -19267,31 +9195,27 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
+ n_trials: 128
+ n_jobs: 8
params:
+model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+model.init.fit_intercept: True,False
+model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -19305,36 +9229,38 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_logistic
outs:
- - path: kdd_nsl/logs/gzip_logistic/500
+ - path: truthseeker/logs/gzip_logistic/100/symmetry_true
hash: md5
- md5: 44795a3a64e10088623faf15b87a4548.dir
- size: 1666384
- nfiles: 514
- - path: kdd_nsl/reports/gzip_logistic/500/train/
+ md5: d6d4b0b157b08346ad1b518d2edfe1f8.dir
+ size: 1243931
+ nfiles: 513
+ - path: truthseeker/reports/gzip_logistic/100/symmetry_true/train/
hash: md5
- md5: 607cd0515dec2502b0bd11b6480b5d7b.dir
- size: 565896
- nfiles: 357
- grid_search@500-kdd_nsl-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl
- data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=kdd_nsl/logs/gzip_svc/500 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/500/study.csv
- files.directory=kdd_nsl files.reports=reports/gzip_svc/500 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_svc --multirun
+ md5: 8f94e7db8385fb9f3973eb19b328397a.dir
+ size: 639777
+ nfiles: 326
+ grid_search@100-truthseeker-gzip_logistic-false:
+ cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic
+ model.init.distance_matrix=null model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_truthseeker
+ hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/100/symmetry_false
+ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/100/study.csv
+ files.directory=truthseeker files.reports=reports/gzip_logistic/100/symmetry_false
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_logistic --multirun
deps:
- - path: conf/gzip_svc.yaml
+ - path: conf/gzip_logistic.yaml
hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
+ md5: da7adfd9b59783b6cd34f750dfcfb1b5
+ size: 1993
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_svc.yaml:
+ conf/gzip_logistic.yaml:
hydra:
run:
dir: ${dataset}/logs/${stage}/
@@ -19346,121 +9272,35 @@ stages:
_target_: database.OptunaStudyDumpCallback
storage: ${hydra.sweeper.storage}
study_name: ${hydra.sweeper.study_name}
- directions:
- - maximize
- metric_names:
- - accuracy
+ directions: ${direction}
+ metric_names: ${optimizers}
output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
params:
- +model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.gamma: scale,auto
- +model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ +model.init.solver: saga
+ +model.init.penalty: l2,l1
+ +model.init.tol: tag(log, interval(1e-5, 1e-1))
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.fit_intercept: True,False
+ +model.init.class_weight: balanced,None
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_svc
- outs:
- - path: kdd_nsl/logs/gzip_svc/500
- hash: md5
- md5: 1ed2e3d83e888471981684eaaa3f3b8e.dir
- size: 1613038
- nfiles: 514
- - path: kdd_nsl/reports/gzip_svc/500/train/
- hash: md5
- md5: c53dae7497a8f55965cc708c28280f4e.dir
- size: 555797
- nfiles: 384
- grid_search@500-sms_spam-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=sms_spam/logs/gzip_knn/500 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/500/study.csv
- files.directory=sms_spam files.reports=reports/gzip_knn/500 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_knn --multirun
- deps:
- - path: conf/gzip_knn.yaml
- hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- conf/gzip_knn.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.num}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
- seed: 123
- consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
direction: ${direction}
- storage: sqlite:///optuna.db
- study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
max_failure_rate: 1.0
- params:
- model.init.k: 1,3,5,7,11
- +model.init.weights: uniform,distance
- +model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -19472,37 +9312,38 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_knn
outs:
- - path: sms_spam/logs/gzip_knn/500
+ - path: truthseeker/logs/gzip_logistic/100/symmetry_false
hash: md5
- md5: 0e5c9c1b5970ef63e76b3adcbb1d9bde.dir
- size: 1465483
- nfiles: 514
- - path: sms_spam/reports/gzip_knn/500/train/
+ md5: e00ee47514e58ea5f4d39063d194ca52.dir
+ size: 1288351
+ nfiles: 513
+ - path: truthseeker/reports/gzip_logistic/100/symmetry_false/train/
hash: md5
- md5: dd14847ddf87817f4410aea70b8fdce3.dir
- size: 378991
- nfiles: 331
- grid_search@500-sms_spam-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_logistic
- model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_sms_spam
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/500
- hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/500/study.csv
- files.directory=sms_spam files.reports=reports/gzip_logistic/500 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_logistic --multirun
+ md5: 6eaa1b0799b99345f36c3649419ed12f.dir
+ size: 581607
+ nfiles: 364
+ grid_search@100-truthseeker-gzip_svc-true:
+ cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_svc/100/symmetry_true
+ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/100/study.csv
+ files.directory=truthseeker files.reports=reports/gzip_svc/100/symmetry_true
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ ++raise_exception=True --config-name gzip_svc --multirun
deps:
- - path: conf/gzip_logistic.yaml
+ - path: conf/gzip_svc.yaml
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_logistic.yaml:
+ conf/gzip_svc.yaml:
hydra:
run:
dir: ${dataset}/logs/${stage}/
@@ -19514,37 +9355,33 @@ stages:
_target_: database.OptunaStudyDumpCallback
storage: ${hydra.sweeper.storage}
study_name: ${hydra.sweeper.study_name}
- directions: ${direction}
- metric_names: ${optimizers}
+ directions:
+ - maximize
+ metric_names:
+ - accuracy
output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
+ n_trials: 128
+ n_jobs: 8
params:
- +model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.fit_intercept: True,False
- +model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
+ +model.init.kernel: rbf,precomputed
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+ +model.init.gamma: scale,auto
+ +model.init.class_weight: balanced,null
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -19558,34 +9395,36 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_logistic
outs:
- - path: sms_spam/logs/gzip_logistic/500
+ - path: truthseeker/logs/gzip_svc/100/symmetry_true
hash: md5
- md5: 6e6d0761de2d778fbdbebd1d547f04a1.dir
- size: 1619183
- nfiles: 514
- - path: sms_spam/reports/gzip_logistic/500/train/
+ md5: 4d85a297bae6c4437d8775268b8f09aa.dir
+ size: 1252991
+ nfiles: 513
+ - path: truthseeker/reports/gzip_svc/100/symmetry_true/train/
hash: md5
- md5: fb78d7f4f526194a09b6561a121f734e.dir
- size: 553072
- nfiles: 361
- grid_search@500-sms_spam-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam
- data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=sms_spam/logs/gzip_svc/500 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/500/study.csv
- files.directory=sms_spam files.reports=reports/gzip_svc/500 hydra.launcher.n_jobs=-1
+ md5: e5dbcf02229d9973d0d948ab7291138c.dir
+ size: 546664
+ nfiles: 384
+ grid_search@100-truthseeker-gzip_svc-false:
+ cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
+ data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_svc/100/symmetry_false
+ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/100/study.csv
+ files.directory=truthseeker files.reports=reports/gzip_svc/100/symmetry_false
+ hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
++raise_exception=True --config-name gzip_svc --multirun
deps:
- path: conf/gzip_svc.yaml
hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
+ md5: ef6089c75166b6acb57ce97a89157ad9
+ size: 1905
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
conf/gzip_svc.yaml:
hydra:
@@ -19607,29 +9446,25 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
+ consider_endpoints: true
+ n_startup_trials: 256
+ n_ei_candidates: 32
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
study_name: ${dataset}_${model_name}_${stage}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ n_trials: 128
+ n_jobs: 8
params:
+model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
+ +model.init.C: tag(log, interval(1e-3, 1e3))
+model.init.gamma: scale,auto
+model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
direction: ${direction}
max_failure_rate: 1.0
launcher:
@@ -19643,34 +9478,34 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_svc
outs:
- - path: sms_spam/logs/gzip_svc/500
+ - path: truthseeker/logs/gzip_svc/100/symmetry_false
hash: md5
- md5: 4b37a4947b8a27e8b050b76a2252f6d2.dir
- size: 1542505
- nfiles: 514
- - path: sms_spam/reports/gzip_svc/500/train/
+ md5: b33c39d320d25d5bfbd81006713e3d62.dir
+ size: 1254591
+ nfiles: 513
+ - path: truthseeker/reports/gzip_svc/100/symmetry_false/train/
hash: md5
- md5: adfaa61acf833b9b2d823fd944876030.dir
- size: 543664
+ md5: 13ac657603b4c71f4a17d78cbdc69083.dir
+ size: 547239
nfiles: 384
- grid_search@500-truthseeker-gzip_knn:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=truthseeker/logs/gzip_knn/500 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/500/study.csv
- files.directory=truthseeker files.reports=reports/gzip_knn/500 hydra.launcher.n_jobs=-1
+ grid_search@300-ddos-gzip_knn-true:
+ cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=300
+ data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_knn/300/symmetry_true
+ hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/300/study.csv files.directory=ddos
+ files.reports=reports/gzip_knn/300/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
++raise_exception=True --config-name gzip_knn --multirun
deps:
- path: conf/gzip_knn.yaml
hash: md5
- md5: a58015cd6f327e171842b045a2524bfd
- size: 2062
+ md5: 187b2fd2a0a70b8980acfd256687f05a
+ size: 1928
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
conf/gzip_knn.yaml:
hydra:
@@ -19690,11 +9525,11 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
+ seed: 123
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
+ consider_endpoints: true
n_startup_trials: 10
n_ei_candidates: 24
multivariate: true
@@ -19702,18 +9537,15 @@ stages:
direction: ${direction}
storage: sqlite:///optuna.db
study_name: ${dataset}_${model_name}_${stage}
- n_trials: 2
- n_jobs: 2
+ n_trials: 128
+ n_jobs: 8
max_failure_rate: 1.0
params:
model.init.k: 1,3,5,7,11
+model.init.weights: uniform,distance
+model.init.algorithm: brute
- model.init.symmetric: True,False
- ++model.init.precompute: true
model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name: ${model_name}
- ++data.sample.random_state: int(interval(1, 10000))
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -19725,43 +9557,42 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_knn
outs:
- - path: truthseeker/logs/gzip_knn/500
+ - path: ddos/logs/gzip_knn/300/symmetry_true
hash: md5
- md5: 8f89bb6eee2faa7d319f0e667a455558.dir
- size: 1449788
- nfiles: 514
- - path: truthseeker/reports/gzip_knn/500/train/
+ md5: d23dbd6a384157d616bebeeb6cf41a27.dir
+ size: 1175564
+ nfiles: 513
+ - path: ddos/reports/gzip_knn/300/symmetry_true/train/
hash: md5
- md5: 22ad9cc6a9f1fc454ff08e23e1194b6a.dir
- size: 382020
- nfiles: 333
- grid_search@500-truthseeker-gzip_logistic:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_logistic
- model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_truthseeker
- hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/500
- hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/500/study.csv
- files.directory=truthseeker files.reports=reports/gzip_logistic/500 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_logistic --multirun
+ md5: 4c887424c72eed237277e641dfcd47e2.dir
+ size: 191347
+ nfiles: 205
+ grid_search@300-ddos-gzip_knn-false:
+ cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=300
+ data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null
+ model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128
+ hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_knn/300/symmetry_false
+ hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/300/study.csv files.directory=ddos
+ files.reports=reports/gzip_knn/300/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ ++raise_exception=True --config-name gzip_knn --multirun
deps:
- - path: conf/gzip_logistic.yaml
+ - path: conf/gzip_knn.yaml
hash: md5
- md5: 847d4d804fff0b6f2533f90820eebd04
- size: 2205
+ md5: 187b2fd2a0a70b8980acfd256687f05a
+ size: 1928
- path: params.yaml
hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
+ md5: 486532089f9aed37612260a1f0a2bead
+ size: 1469
params:
- conf/gzip_logistic.yaml:
+ conf/gzip_knn.yaml:
hydra:
run:
dir: ${dataset}/logs/${stage}/
sweep:
dir: ???
- subdir: ${hydra.job.id}
+ subdir: ${hydra.job.num}
callbacks:
study_dump:
_target_: database.OptunaStudyDumpCallback
@@ -19773,118 +9604,27 @@ stages:
sweeper:
sampler:
_target_: optuna.samplers.TPESampler
- seed: 123
consider_prior: true
- prior_weight: 1.0
- consider_magic_clip: true
- consider_endpoints: false
- n_startup_trials: 10
- n_ei_candidates: 24
- multivariate: true
- _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
- storage: sqlite:///optuna.db
- n_jobs: 1
- n_trials: 1
- params:
- +model.init.solver: saga
- +model.init.penalty: l2,l1,l2,none
- +model.init.tol: 1e-4,1e-3,1e-2
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.fit_intercept: True,False
- +model.init.class_weight: balanced,None
- model.init.symmetric: True,False
- ++model.init.precompute: true
- model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
- model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
- launcher:
- _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
- n_jobs: 8
- prefer: processes
- verbose: 1
- timeout:
- pre_dispatch: ${hydra.sweeper.n_jobs}
- batch_size: auto
- temp_folder: /tmp/deckard
- max_nbytes: 100000
- mmap_mode: r
- model_name: gzip_logistic
- outs:
- - path: truthseeker/logs/gzip_logistic/500
- hash: md5
- md5: 536a09eb3f82d03737e3cec6aafdbac8.dir
- size: 1605851
- nfiles: 514
- - path: truthseeker/reports/gzip_logistic/500/train/
- hash: md5
- md5: 4560cd0abd0609eebe34c6f578d77f2d.dir
- size: 556183
- nfiles: 375
- grid_search@500-truthseeker-gzip_svc:
- cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker
- data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null
- hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8
- hydra.sweep.dir=truthseeker/logs/gzip_svc/500 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/500/study.csv
- files.directory=truthseeker files.reports=reports/gzip_svc/500 hydra.launcher.n_jobs=-1
- ++raise_exception=True --config-name gzip_svc --multirun
- deps:
- - path: conf/gzip_svc.yaml
- hash: md5
- md5: 957922cb6993eb99866232d944a4a106
- size: 2131
- - path: params.yaml
- hash: md5
- md5: 8be0cf0b5f453ffb12b19a1bf1af6468
- size: 1435
- params:
- conf/gzip_svc.yaml:
- hydra:
- run:
- dir: ${dataset}/logs/${stage}/
- sweep:
- dir: ???
- subdir: ${hydra.job.id}
- callbacks:
- study_dump:
- _target_: database.OptunaStudyDumpCallback
- storage: ${hydra.sweeper.storage}
- study_name: ${hydra.sweeper.study_name}
- directions:
- - maximize
- metric_names:
- - accuracy
- output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv
- sweeper:
- sampler:
- _target_: optuna.samplers.TPESampler
seed: 123
- consider_prior: true
prior_weight: 1.0
consider_magic_clip: true
- consider_endpoints: false
+ consider_endpoints: true
n_startup_trials: 10
n_ei_candidates: 24
multivariate: true
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
- study_name: ${dataset}_${model_name}_${stage}
+ direction: ${direction}
storage: sqlite:///optuna.db
- n_jobs: 2
- n_trials: 2
+ study_name: ${dataset}_${model_name}_${stage}
+ n_trials: 128
+ n_jobs: 8
+ max_failure_rate: 1.0
params:
- +model.init.kernel: rbf,precomputed
- +model.init.C: 1e-2,1e-1,1e0,1e1,1e2
- +model.init.gamma: scale,auto
- +model.init.class_weight: balanced,null
- model.init.symmetric: True,False
- ++model.init.precompute: true
+ model.init.k: 1,3,5,7,11
+ +model.init.weights: uniform,distance
+ +model.init.algorithm: brute
model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
model_name: ${model_name}
- data.sample.random_state: int(interval(1, 10000))
- direction: ${direction}
- max_failure_rate: 1.0
launcher:
_target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher
n_jobs: 8
@@ -19896,15 +9636,14 @@ stages:
temp_folder: /tmp/deckard
max_nbytes: 100000
mmap_mode: r
- model_name: gzip_svc
outs:
- - path: truthseeker/logs/gzip_svc/500
+ - path: ddos/logs/gzip_knn/300/symmetry_false
hash: md5
- md5: 10808502e0c1c7d780ea6178ae53c19c.dir
- size: 1568093
- nfiles: 514
- - path: truthseeker/reports/gzip_svc/500/train/
+ md5: 8ed5c114922082086fcec773797c4983.dir
+ size: 1159774
+ nfiles: 513
+ - path: ddos/reports/gzip_knn/300/symmetry_false/train/
hash: md5
- md5: 1fb9105254065d6d93e9647e12d650b2.dir
- size: 547905
- nfiles: 384
+ md5: 4122b0aa41babba1d8a8e141206a1c1a.dir
+ size: 167245
+ nfiles: 167
diff --git a/examples/gzip/dvc.yaml b/examples/gzip/dvc.yaml
index b7d4c8d6..367523d2 100644
--- a/examples/gzip/dvc.yaml
+++ b/examples/gzip/dvc.yaml
@@ -71,112 +71,12 @@ stages:
- params.yaml
- raw_data/ # Raw data
##############################################################################
- test_each_dataset:
- matrix:
- dataset : [ddos, truthseeker, sms_spam, kdd_nsl]
- model_name : [gzip_knn, gzip_svc, gzip_logistic]
- cmd : >-
- python -m deckard.layers.optimise
- stage=train
- files.name=${item.model_name}
- data.sample.train_size=100
- files.directory=${item.dataset}
- data=${item.dataset}
- dataset=${item.dataset}
- model_name=${item.model_name}
- model=${item.model_name}
- hydra.run.dir=${item.dataset}/logs/train/${item.model_name}
- ++raise_exception=True
- deps:
- - params.yaml
- - ${files.directory}/${files.reports}/train/default/${files.score_dict_file}
- outs:
- - ${item.dataset}/${files.reports}/train/${item.model_name}/${files.score_dict_file}
- - ${item.dataset}/logs/train/${item.model_name}
- params:
- - data
- - model
- - scorers
- - files
- - dataset
- - model_name
- - device_id
- ##############################################################################
- test_each_metric:
- matrix:
- metric: [gzip, zstd, pkl, bz2, lzma,levenshtein, ratio, hamming, jaro, jaro_winkler, seqratio]
- model : [gzip_knn,] # gzip_svc, gzip_logistic
- dataset : [kdd_nsl] #truthseeker, sms_spam, ddos
- train_size: [20] #100, 1000, 10000
- cmd : >-
- python -m deckard.layers.optimise
- stage=test_each_metric
- files.name=${item.model}/${item.metric}/${item.train_size}
- files.directory=${item.dataset}
- data=${item.dataset}
- data.sample.train_size=${item.train_size}
- dataset=${item.dataset}
- model=${item.model}
- model_name=${model_name}
- model.init.metric=${item.metric}
- model.init.m=-1
- hydra.run.dir=${item.dataset}/logs/test_each_metric/${item.model}/${item.metric}/${item.train_size}
- ++raise_exception=True
- deps:
- - params.yaml
- - ${files.directory}/${files.reports}/train/default/${files.score_dict_file}
- outs:
- - ${item.dataset}/${files.reports}/test_each_metric/${item.model}/${item.metric}/${item.train_size}/${files.score_dict_file}
- - ${item.dataset}/logs/test_each_metric/${item.model}/${item.metric}/${item.train_size}
- params:
- - data
- - model
- - scorers
- - files
- - dataset
- - model_name
- - device_id
- # ##############################################################################
- test_each_model:
- matrix:
- metric: [gzip] #, zstd, pkl, bz2, lzma,levenshtein, ratio, hamming, jaro, jaro_winkler, seqratio
- model : [gzip_knn, gzip_svc, gzip_logistic]
- dataset : [kdd_nsl] #truthseeker, sms_spam, ddos
- train_size: [20] #100, 1000, 10000
- cmd : >-
- python -m deckard.layers.optimise
- stage=test_each_model
- files.name=${item.model}/${item.metric}/${item.train_size}
- files.directory=${item.dataset}
- data=${item.dataset}
- data.sample.train_size=${item.train_size}
- dataset=${item.dataset}
- model=${item.model}
- model_name=${model_name}
- model.init.metric=${item.metric}
- model.init.m=-1
- hydra.run.dir=${item.dataset}/logs/test_each_model/${item.model}/${item.metric}/${item.train_size}
- ++raise_exception=True
- deps:
- - params.yaml
- - ${files.directory}/${files.reports}/train/default/${files.score_dict_file}
- outs:
- - ${item.dataset}/${files.reports}/test_each_model/${item.model}/${item.metric}/${item.train_size}/${files.score_dict_file}
- - ${item.dataset}/logs/test_each_model/${item.model}/${item.metric}/${item.train_size}
- params:
- - data
- - model
- - scorers
- - files
- - dataset
- - model_name
- - device_id
- ##############################################################################
grid_search:
matrix:
train_size: [20, 100, 300, 500] #
dataset : [ddos, kdd_nsl, sms_spam, truthseeker] #
configs: [gzip_knn, gzip_logistic, gzip_svc]
+ symmetric : [True, False]
cmd: >-
python -m deckard.layers.optimise
stage=train
@@ -186,14 +86,17 @@ stages:
data.sample.test_size=100
model_name=${item.configs}
model.init.distance_matrix=null
+ model.init.symmetric=${item.symmetric}
hydra.sweeper.study_name=${item.configs}_${item.dataset}
hydra.sweeper.n_trials=128
hydra.sweeper.n_jobs=8
- hydra.sweep.dir=${item.dataset}/logs/${item.configs}/${item.train_size}
+ hydra.sweep.dir=${item.dataset}/logs/${item.configs}/${item.train_size}/symmetry_${item.symmetric}
hydra.callbacks.study_dump.output_file=${item.dataset}/logs/${item.configs}/${item.train_size}/study.csv
files.directory=${item.dataset}
- files.reports=${files.reports}/${item.configs}/${item.train_size}
+ files.reports=${files.reports}/${item.configs}/${item.train_size}/symmetry_${item.symmetric}
hydra.launcher.n_jobs=-1
+ ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10
+ model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio
++raise_exception=True
--config-name ${item.configs}
--multirun
@@ -201,15 +104,17 @@ stages:
- params.yaml
- conf/${item.configs}.yaml
outs:
- - ${item.dataset}/logs/${item.configs}/${item.train_size}
- - ${item.dataset}/${files.reports}/${item.configs}/${item.train_size}/train/:
+ - ${item.dataset}/logs/${item.configs}/${item.train_size}/symmetry_${item.symmetric}:
+ cache: true
+ persist: true
+ push: true
+ - ${item.dataset}/${files.reports}/${item.configs}/${item.train_size}/symmetry_${item.symmetric}/train/:
cache: true
persist: true
push: true
params:
- conf/${item.configs}.yaml:
- hydra
- - model_name
##############################################################################
# find_best_model: # This isn't actually used in later steps, but it's handy to have these configs ready for a line search instead of a massive grid search
# matrix:
@@ -221,43 +126,12 @@ stages:
# python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name ${item.model}_${item.dataset} --config_subdir model --params_file best_${item.model}_${item.dataset} --default_config ${item.model}
# outs:
# - conf/model/best_${item.model}_${item.dataset}.yaml
- #############################################################################
- test_each_method:
- matrix:
- dataset : [ddos] # kdd_nsl, truthseeker, sms_spam,
- method: [medoid, sum, svc, hardness, nearmiss,random,knn]
- cmd : >-
- python -m deckard.layers.optimise
- stage=train
- +model.init.sampling_method=${item.method}
- model.init.m=3
- data.sample.train_size=100
- files.name=${item.method}
- files.directory=${item.dataset}
- data=${item.dataset}
- dataset=${item.dataset}
- model_name=${item.method}
- hydra.run.dir=${item.dataset}/logs/method/${item.method}
- ++raise_exception=True
- deps:
- - params.yaml
- - ${files.directory}/${files.reports}/train/default/${files.score_dict_file}
- outs:
- - ${item.dataset}/${files.reports}/train/${item.method}/${files.score_dict_file}
- - ${item.dataset}/logs/method/${item.method}
- params:
- - data
- - model
- - scorers
- - files
- - dataset
- - model_name
- - device_id
##############################################################################
condense:
matrix:
dataset : [ddos, kdd_nsl, truthseeker, sms_spam,] # kdd_nsl, truthseeker, sms_spam,
model_name : [knn, svc, logistic]
+ ratio : [1, .9, .8, .7, .6, .5, .4, .3, .2, .1]
deps:
- params.yaml
- conf/condense_${item.model_name}.yaml
@@ -270,19 +144,28 @@ stages:
data.sample.test_size=100
model_name=condensed_${item.model_name}
model=gzip_${item.model_name}
+ ++model.init.m=${item.ratio}
+ ++model.init.distance_matrix=${item.dataset}/models/${item.model_name}/${item.ratio}/distance_matrix.npz
files.directory=${item.dataset}
- files.reports=${files.reports}/condense/${item.model_name}/
+ files.reports=${files.reports}/condense/${item.model_name}/${item.ratio}/
hydra.sweeper.study_name=condense_${item.model_name}_${item.dataset}
- hydra.sweeper.n_trials=1024
+ hydra.sweeper.n_trials=128
hydra.sweeper.n_jobs=8
- hydra.sweep.dir=${item.dataset}/logs/condense/${item.model_name}/
+ hydra.sweep.dir=${item.dataset}/logs/condense/${item.model_name}/${item.ratio}/
hydra.callbacks.study_dump.output_file=${item.dataset}/logs/${item.model_name}/study.csv
hydra.launcher.n_jobs=-1
--config-name condense_${item.model_name}
--multirun
outs:
- - ${item.dataset}/logs/condense/${item.model_name}/
- - ${item.dataset}/${files.reports}/condense/${item.model_name}/:
+ - ${item.dataset}/logs/condense/${item.model_name}/${item.ratio}:
+ cache: true
+ persist: true
+ push: true
+ - ${item.dataset}/${files.reports}/condense/${item.model_name}/${item.ratio}:
+ cache: true
+ persist: true
+ push: true
+ - ${item.dataset}/models/${item.model_name}/${item.ratio}/:
cache: true
persist: true
push: true
@@ -291,7 +174,7 @@ stages:
- hydra
compile:
matrix:
- dataset : [kdd_nsl, sms_spam, ddos]
+ dataset : [kdd_nsl, sms_spam, ddos, truthseeker]
stage : [gzip_knn, gzip_svc, gzip_logistic, condense/knn, condense/svc, condense/logistic]
deps:
- ${item.dataset}/${files.reports}/${item.stage}/
@@ -304,7 +187,7 @@ stages:
##############################################################################
clean:
matrix:
- dataset : [kdd_nsl, sms_spam, ddos]
+ dataset : [kdd_nsl, sms_spam, ddos, truthseeker]
stage : [gzip_knn, gzip_svc, gzip_logistic, condense/knn, condense/svc, condense/logistic]
deps:
- ${item.dataset}/${files.reports}/${item.stage}.csv
@@ -318,10 +201,12 @@ stages:
params:
- conf/clean.yaml:
- replace
+ - drop_values
+ - replace_cols
##############################################################################
merge:
matrix:
- dataset : [kdd_nsl, sms_spam, ddos]
+ dataset : [kdd_nsl, sms_spam, ddos, truthseeker]
deps:
- ${item.dataset}/plots/clean/gzip_knn.csv
- ${item.dataset}/plots/clean/gzip_logistic.csv
@@ -338,7 +223,7 @@ stages:
##############################################################################
merge_condense:
matrix:
- dataset : [kdd_nsl, sms_spam, ddos]
+ dataset : [kdd_nsl, sms_spam, ddos, truthseeker]
deps:
- ${item.dataset}/plots/clean/condense/knn.csv
- ${item.dataset}/plots/clean/condense/logistic.csv
@@ -355,7 +240,7 @@ stages:
##############################################################################
plot:
matrix:
- dataset : [kdd_nsl, sms_spam, ddos]
+ dataset : [kdd_nsl, sms_spam, ddos, truthseeker]
cmd: >-
python -m deckard.layers.plots
--path ${item.dataset}/plots/
@@ -363,6 +248,7 @@ stages:
-c conf/plots.yaml
deps:
- ${item.dataset}/plots/merged.csv
+ - conf/plots.yaml
plots:
- ${item.dataset}/plots/${line_plot[0].file}
- ${item.dataset}/plots/${line_plot[1].file}
@@ -379,7 +265,7 @@ stages:
##############################################################################
plot_condense:
matrix:
- dataset : [kdd_nsl, sms_spam, ddos]
+ dataset : [kdd_nsl, sms_spam, ddos, truthseeker]
cmd: >-
python -m deckard.layers.plots
--path ${item.dataset}/plots/
@@ -387,22 +273,72 @@ stages:
-c conf/condensed_plots.yaml
deps:
- ${item.dataset}/plots/condensed_merged.csv
+ - conf/condensed_plots.yaml
plots:
- - ${item.dataset}/plots/sampling_method_vs_accuracy.pdf
- - ${item.dataset}/plots/sampling_method_vs_train_time.pdf
- - ${item.dataset}/plots/sampling_method_vs_predict_time.pdf
+ - ${item.dataset}/plots/condensing_method_vs_accuracy.pdf
+ - ${item.dataset}/plots/condensing_method_vs_train_time.pdf
+ - ${item.dataset}/plots/condensing_method_vs_predict_time.pdf
params:
- conf/condensed_plots.yaml:
+ - cat_plot
+ ##############################################################################
+ merge_datasets:
+ cmd: >-
+ python merge.py
+ --big_dir .
+ --little_dir .
+ --data_file sms_spam/plots/merged.csv
+ --little_dir_data_file kdd_nsl/plots/merged.csv ddos/plots/merged.csv truthseeker/plots/merged.csv kdd_nsl/plots/condensed_merged.csv ddos/plots/condensed_merged.csv truthseeker/plots/condensed_merged.csv sms_spam/plots/condensed_merged.csv
+ --output_folder combined/plots/
+ --output_file merged.csv
+ deps:
+ - sms_spam/plots/merged.csv
+ - kdd_nsl/plots/merged.csv
+ - ddos/plots/merged.csv
+ - truthseeker/plots/merged.csv
+ outs:
+ - combined/plots/merged.csv
+ ##############################################################################
+ plot_merged:
+ cmd: >-
+ python -m deckard.layers.plots
+ --path combined/plots/
+ --file combined/plots/merged.csv
+ -c conf/merged_plots.yaml
+ deps:
+ - combined/plots/merged.csv
+ - conf/merged_plots.yaml
+ plots:
+ - combined/plots/compressor_metric_vs_accuracy.pdf
+ - combined/plots/compressor_metric_vs_train_time.pdf
+ - combined/plots/compressor_metric_vs_predict_time.pdf
+ - combined/plots/string_metric_vs_accuracy.pdf
+ - combined/plots/string_metric_vs_train_time.pdf
+ - combined/plots/string_metric_vs_predict_time.pdf
+ - combined/plots/symmetric_models_vs_accuracy.pdf
+ - combined/plots/symmetric_models_vs_train_time.pdf
+ - combined/plots/symmetric_models_vs_predict_time.pdf
+ - combined/plots/condensing_methods_vs_accuracy.pdf
+ - combined/plots/condensing_methods_vs_train_time.pdf
+ - combined/plots/condensing_methods_vs_predict_time.pdf
+ - combined/plots/models_vs_accuracy.pdf
+ - combined/plots/models_vs_train_time.pdf
+ - combined/plots/models_vs_predict_time.pdf
+ params:
+ - conf/merged_plots.yaml:
+ - cat_plot
+ - conf/merged_plots.yaml:
- line_plot
- # copy:
- # matrix:
- # dataset : [kdd_nsl, truthseeker, sms_spam, ddos]
- # cmd: >-
- # rm -rf ~/Gzip-KNN/figs/${item.dataset}/ &&
- # mkdir -p ~/Gzip-KNN/figs/${item.dataset}/ &&
- # cp -r ${item.dataset}/plots/* ~/Gzip-KNN/figs/${item.dataset}/
- # deps:
- # - ${item.dataset}/plots/
+ copy:
+ matrix:
+ dataset : [kdd_nsl, truthseeker, sms_spam, ddos, combined]
+ cmd: >-
+ rm -rf ~/Gzip-KNN/figs/${item.dataset}/ &&
+ mkdir -p ~/Gzip-KNN/figs/${item.dataset}/ &&
+ cp -r ${item.dataset}/plots/* ~/Gzip-KNN/figs/${item.dataset}/ &&
+ rm -rf ~/Gzip-KNN/figs/${item.dataset}/.gitignore
+ deps:
+ - ${item.dataset}/plots/
# ##############################################################################
# # attack:
# # cmd: python -m deckard.layers.experiment attack
diff --git a/examples/gzip/gzip_classifier.py b/examples/gzip/gzip_classifier.py
index 49d4e159..fb4aef27 100644
--- a/examples/gzip/gzip_classifier.py
+++ b/examples/gzip/gzip_classifier.py
@@ -16,6 +16,7 @@
# python -m pip install numpy scikit-learn tqdm scikit-learn-extra pandas imbalanced-learn
import numpy as np
+import warnings
import gzip
from tqdm import tqdm
from pathlib import Path
@@ -33,6 +34,7 @@
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn_extra.cluster import KMedoids
+from sklearn.exceptions import DataConversionWarning
from imblearn.under_sampling import (
CondensedNearestNeighbour,
NearMiss,
@@ -46,43 +48,46 @@
from batchMixin import BatchedMixin
+warnings.simplefilter(action="ignore", category=FutureWarning)
+warnings.simplefilter(action="ignore", category=UserWarning)
+
logger = logging.getLogger(__name__)
-def _gzip_compressor(x):
+def _gzip_len(x):
return len(gzip.compress(str(x).encode()))
-def _lzma_compressor(x):
+def _lzma_len(x):
import lzma
return len(lzma.compress(str(x).encode()))
-def _bz2_compressor(x):
+def _bz2_len(x):
import bz2
return len(bz2.compress(str(x).encode()))
-def _zstd_compressor(x):
+def _zstd_len(x):
import zstd
return len(zstd.compress(str(x).encode()))
-def _pickle_compressor(x):
+def _pickle_len(x):
import pickle
return len(pickle.dumps(x))
compressors = {
- "gzip": _gzip_compressor,
- "lzma": _lzma_compressor,
- "bz2": _bz2_compressor,
- "zstd": _zstd_compressor,
- "pkl": _pickle_compressor,
+ "gzip": _gzip_len,
+ "lzma": _lzma_len,
+ "bz2": _bz2_len,
+ "zstd": _zstd_len,
+ "pkl": _pickle_len,
}
@@ -102,15 +107,15 @@ def ncd(
float: The normalized compression distance between x1 and x2
"""
- compressor = (
+ compressor_len = (
compressors[method] if method in compressors.keys() else compressors["gzip"]
)
x1 = str(x1)
x2 = str(x2)
- Cx1 = compressor(x1) if cx1 is None else cx1
- Cx2 = compressor(x2) if cx2 is None else cx2
+ Cx1 = compressor_len(x1) if cx1 is None else cx1
+ Cx2 = compressor_len(x2) if cx2 is None else cx2
x1x2 = " ".join([x1, x2])
- Cx1x2 = compressor(x1x2)
+ Cx1x2 = compressor_len(x1x2)
min_ = min(Cx1, Cx2)
max_ = max(Cx1, Cx2)
ncd = (Cx1x2 - min_) / max_
@@ -131,6 +136,17 @@ def ncd(
**string_metrics,
}
+all_condensers = [
+ "sum",
+ "mean",
+ "medoid",
+ "random",
+ "knn",
+ "svc",
+ "hardness",
+ "nearmiss",
+]
+
def _calculate_string_distance(x1, x2, method):
x1 = str(x1)
@@ -182,7 +198,6 @@ def __init__(
distance_matrix=None,
metric="gzip",
symmetric=False,
- precompute=True,
**kwargs,
):
"""
@@ -197,24 +212,23 @@ def __init__(
If a path is provided, the file will be loaded. If an array is provided, it will be used directly.
Default is None.
symmetric (bool): If True, the distance matrix will be treated as symmetric. Default is False.
- precompute (bool): If True, the distance matrix will be precomputed and stored in self.distance_matrix during the fit method and a sklearn KNeighborsClassifier object will be created and stored in self.clf_.
Raises:
ValueError: If distance_matrix is not a path to a numpy file or a numpy array.
NotImplementedError: If the metric is not supported.
"""
kwarg_string = str([f"{key}={value}" for key, value in kwargs.items()])
- logger.info(
- f"Initializing GzipClassifier with m={m}, method={sampling_method}, distance_matrix={distance_matrix}, metric={metric}, symmetric={symmetric}, precompute={precompute}, {kwarg_string}",
+ logger.debug(
+ f"Initializing GzipClassifier with m={m}, method={sampling_method}, distance_matrix={distance_matrix}, metric={metric}, symmetric={symmetric}, {kwarg_string}",
)
self.m = m
self.sampling_method = sampling_method
if metric in compressors.keys():
- logger.info(f"Using NCD metric with {metric} compressor.")
+ logger.debug(f"Using NCD metric with {metric} compressor.")
self._distance = ncd
self.metric = metric
elif metric in string_metrics.keys():
- logger.info(f"Using {metric} metric")
+ logger.debug(f"Using {metric} metric")
self._distance = _calculate_string_distance
self.metric = metric
else:
@@ -231,7 +245,6 @@ def __init__(
self._calculate_distance_matrix = (
self._calculate_rectangular_distance_matrix
)
- self.precompute = precompute # If True, the distance matrix will be precomputed and stored in self.distance_matrix during the fit method and a sklearn KNeighborsClassifier object will be created and stored in self.clf_.
self.distance_matrix = distance_matrix
for key, value in kwargs.items():
setattr(self, key, value)
@@ -258,6 +271,7 @@ def _calculate_rectangular_distance_matrix(
desc="Calculating asymmetric distance matrix.",
leave=False,
dynamic_ncols=True,
+ position=2,
)
Cx1 = Cx1 if Cx1 is not None else [None] * len(x1)
Cx2 = Cx2 if Cx2 is not None else [None] * len(x2)
@@ -310,6 +324,7 @@ def _calculate_lower_triangular_distance_matrix(
desc="Calculating symmetric distance metrix.",
leave=False,
dynamic_ncols=True,
+ position=0,
)
Cx1 = Cx1 if Cx1 is not None else [None] * len(x1)
Cx2 = Cx2 if Cx2 is not None else [None] * len(x2)
@@ -420,8 +435,20 @@ def _prepare_training_matrix(self, n_jobs=-1):
n_jobs=n_jobs,
)
self._save_distance_matrix(self.distance_matrix, distance_matrix)
- elif isinstance(self.distance_matrix, np.ndarray):
+ elif isinstance(self.distance_matrix, np.ndarray) and len(
+ self.distance_matrix,
+ ) == len(self.X_):
distance_matrix = self.distance_matrix
+ elif isinstance(self.distance_matrix, np.ndarray) and len(
+ self.distance_matrix,
+ ) != len(self.X_):
+ distance_matrix = self._calculate_distance_matrix(
+ self.X_,
+ self.X_,
+ Cx1=self.Cx_,
+ Cx2=self.Cx_,
+ n_jobs=n_jobs,
+ )
elif isinstance(self.distance_matrix, type(None)):
distance_matrix = self._calculate_distance_matrix(
self.X_,
@@ -434,6 +461,15 @@ def _prepare_training_matrix(self, n_jobs=-1):
raise ValueError(
f"distance_matrix must be a path to a numpy file or a numpy array, got {type(self.distance_matrix)}",
)
+ assert (
+ distance_matrix.shape[0] == distance_matrix.shape[1]
+ ), f"Distance matrix must be square, got {distance_matrix.shape}"
+ assert (
+ len(self.X_) == distance_matrix.shape[0]
+ ), f"Expected len(X) == {distance_matrix.shape[0]}"
+ assert (
+ len(self.y_) == distance_matrix.shape[0]
+ ), f"Expected len(y) == {distance_matrix.shape[0]}"
return distance_matrix
def _find_best_samples(self, method="medoid", n_jobs=-1):
@@ -521,15 +557,18 @@ def _find_best_samples(self, method="medoid", n_jobs=-1):
distance_matrix,
columns=list(range(len(distance_matrix))),
)
+ distance_matrix, y = model.fit_resample(distance_matrix, y)
y = pd.DataFrame(y, columns=["y"])
y.index = list(range(len(y)))
- distance_matrix, y = model.fit_resample(distance_matrix, y)
indices = y.index[: m * n_classes]
else:
raise NotImplementedError(f"Method {method} not supported")
+
+ if len(indices) > len(self.X_):
+ indices = indices[: len(self.X_)]
return indices
- def fit(self, X: np.ndarray, y: np.ndarray, n_jobs=-1):
+ def fit(self, X: np.ndarray, y: np.ndarray, n_jobs=-1, X_test=None, y_test=None):
"""Fit the model using X as training data and y as target values. If self.m is not -1, the best m samples will be selected using the method specified in self.sampling_method.
Args:
@@ -540,7 +579,7 @@ def fit(self, X: np.ndarray, y: np.ndarray, n_jobs=-1):
GzipClassifier: The fitted model
"""
assert len(X) == len(y), f"Expected {len(X)} == {len(y)}"
- logger.info(f"Fitting with X of shape {X.shape} and y of shape {y.shape}")
+ logger.debug(f"Fitting with X of shape {X.shape} and y of shape {y.shape}")
self.X_ = np.array(X) if not isinstance(X, np.ndarray) else X
y = np.array(y) if not isinstance(y, np.ndarray) else y
if len(np.squeeze(y).shape) == 1:
@@ -554,7 +593,7 @@ def fit(self, X: np.ndarray, y: np.ndarray, n_jobs=-1):
flat_y = np.argmax(y, axis=1)
counts = np.bincount(flat_y)
self.counts_ = counts
- logger.info(f"Num Classes: {self.n_classes_}, counts: {counts}")
+ logger.debug(f"Num Classes: {self.n_classes_}, counts: {counts}")
self.n_features_ = X.shape[1] if len(X.shape) > 1 else 1
self.classes_ = range(len(unique_labels(y)))
@@ -579,19 +618,18 @@ def fit(self, X: np.ndarray, y: np.ndarray, n_jobs=-1):
elif self.m == -1:
distance_matrix = self._prepare_training_matrix(n_jobs=n_jobs)
self.distance_matrix = distance_matrix
- elif self.m is None or self.m == 0:
- pass
else:
raise ValueError(
f"Expected {self.m} to be -1, 0, a positive integer or a float between 0 and 1. Got type {type(self.m)}",
)
- if self.precompute is True:
- self.distance_matrix = self._prepare_training_matrix(n_jobs=n_jobs)
- self.clf_ = self.clf_.fit(self.distance_matrix, self.y_)
- else:
- raise NotImplementedError(
- f"Precompute {self.precompute} not supported for type(self.clf_) {type(self.clf_)}",
- )
+ self.distance_matrix = self._prepare_training_matrix(n_jobs=n_jobs)
+ with warnings.catch_warnings():
+ warnings.filterwarnings("error")
+ try:
+ self.clf_ = self.clf_.fit(self.distance_matrix, self.y_)
+ except DataConversionWarning:
+ y = np.ravel(self.y_)
+ self.clf_ = self.clf_.fit(self.distance_matrix, y)
return self
def _set_best_indices(self, indices):
@@ -607,11 +645,9 @@ def _set_best_indices(self, indices):
indices
] # select the transposed columns at the indices
self.distance_matrix = distance_matrix.T # transpose the matrix again
- logger.info(
+ logger.debug(
f"Selected {len(self.X_)} samples using method {self.sampling_method}.",
)
- counts = np.bincount(np.argmax(self.y_, axis=1))
- logger.info(f"Num Classes: {self.n_classes_}, counts: {counts}")
assert len(self.X_) == len(
self.y_,
), f"Expected {len(self.X_)} == {len(self.y_)}"
@@ -630,7 +666,7 @@ def predict(self, X: np.ndarray):
np.ndarray: The predicted class labels
"""
check_is_fitted(self)
- logger.info(f"Predicting with X of shape {X.shape}")
+ logger.debug(f"Predicting with X of shape {X.shape}")
if self.metric in compressors.keys():
compressor = compressors[self.metric]
Cx2 = Parallel(n_jobs=-1)(
@@ -687,7 +723,8 @@ def score(self, X: np.ndarray, y: np.ndarray):
return accuracy_score(y, y_pred)
-class BatchedGzipClassifier(GzipClassifier, BatchedMixin):
+class BatchedGzipClassifier(BatchedMixin, GzipClassifier):
+
pass
@@ -700,7 +737,6 @@ def __init__(
distance_matrix=None,
metric="gzip",
symmetric=False,
- precompute=True,
**kwargs,
):
super().__init__(
@@ -709,7 +745,6 @@ def __init__(
distance_matrix=distance_matrix,
metric=metric,
symmetric=symmetric,
- precompute=precompute,
**kwargs,
)
self.clf_ = KNeighborsClassifier(n_neighbors=k, metric="precomputed", **kwargs)
@@ -726,7 +761,7 @@ def predict(self, X: np.ndarray, n_jobs=-1):
"""
check_is_fitted(self)
- logger.info(f"Predicting with X of shape {X.shape}")
+ logger.debug(f"Predicting with X of shape {X.shape}")
# Pre-compress samples not working
if self.metric in compressors.keys():
compressor = compressors[self.metric]
@@ -760,31 +795,11 @@ def predict(self, X: np.ndarray, n_jobs=-1):
len(X),
len(self.X_),
), f"Expected {distance_matrix.shape} == ({len(X)}, {len(self.X_)})"
- y_pred = []
- if self.precompute is True:
- y_pred = self.clf_.predict(distance_matrix)
- else:
- for i in tqdm(
- range(len(X)),
- desc="Predicting",
- leave=False,
- total=len(X),
- dynamic_ncols=True,
- ):
- # Sort the distances and get the nearest k samples
- sorted_idx = np.argsort(distance_matrix[i])
- # Get the first k samples
- nearest_k = sorted_idx[: self.k]
- # Get the labels of the nearest samples
- nearest_labels = list(self.y_[nearest_k])
- # predict class
- unique, counts = np.unique(nearest_labels, return_counts=True)
- # Get the most frequent label
- y_pred.append(unique[np.argmax(counts)])
+ y_pred = self.clf_.predict(distance_matrix)
return y_pred
-class BatchedGzipKNN(GzipKNN, BatchedMixin):
+class BatchedGzipKNN(BatchedMixin, GzipKNN):
pass
@@ -796,14 +811,11 @@ def __init__(
distance_matrix=None,
metric="gzip",
symmetric=False,
- precompute=True,
**kwargs,
):
- self.precompute = precompute
clf = LogisticRegression(**kwargs)
super().__init__(
clf_=clf,
- precompute=precompute,
sampling_method=sampling_method,
m=m,
distance_matrix=distance_matrix,
@@ -813,7 +825,7 @@ def __init__(
)
-class BatchedGzipLogisticRegressor(GzipLogisticRegressor, BatchedMixin):
+class BatchedGzipLogisticRegressor(BatchedMixin, GzipLogisticRegressor):
pass
@@ -826,14 +838,11 @@ def __init__(
distance_matrix=None,
metric="gzip",
symmetric=False,
- precompute=True,
**kwargs,
):
- self.precompute = precompute
clf = SVC(kernel=kernel, **kwargs)
super().__init__(
clf_=clf,
- precompute=precompute,
sampling_method=sampling_method,
m=m,
distance_matrix=distance_matrix,
@@ -883,10 +892,13 @@ def test_model(
) -> dict:
"""
Args:
- X (np.ndarray): The input data
- y (np.ndarray): The target labels
- train_size (int): The number of samples to use for training. Default is 100.
- test_size (int): The number of samples to use for testing. Default is 100.
+ X_train (np.ndarray): The input data
+ X_test (np.ndarray): The test data
+ y_train (np.ndarray): The target labels
+ y_test (np.ndarray): The test labels
+ model_type (str): The type of model to use. Choices are "knn", "logistic", "svc".
+ optimizer (str): The metric to optimize. Choices are "accuracy", "f1", "precision", "recall".
+ batched (bool): If True, a batched model will be used. Default is False.
**kwargs: Additional keyword arguments to pass to the GzipClassifier
Returns:
dict: A dictionary containing the accuracy, train_time, and pred_time
@@ -898,7 +910,8 @@ def test_model(
alias = model_scorers[model_type]
scorer = scorers[alias]
start = time.time()
- model.fit(X_train, y_train)
+
+ model.fit(X_train, y_train, X_test=X_test, y_test=y_test)
check_is_fitted(model)
end = time.time()
train_time = end - start
@@ -909,7 +922,7 @@ def test_model(
score = round(scorer(y_test, predictions), 3)
print(f"Training time: {train_time}")
print(f"Prediction time: {pred_time}")
- print(f"{alias} is: {score}")
+ print(f"{alias.capitalize()} is: {score}")
score_dict = {
f"{alias.lower()}": score,
"train_time": train_time,
@@ -935,14 +948,9 @@ def load_data(dataset, precompressed):
LabelEncoder().fit(y).transform(y)
) # Turns the labels "alt.atheism" and "talk.religion.misc" into 0 and 1
elif dataset == "kdd_nsl":
- df = pd.read_csv("raw_data/kdd_nsl.csv")
- y = df["label"]
- X = df.drop("label", axis=1)
- elif dataset == "kdd_nsl":
- df = pd.read_csv("raw_data/kdd_nsl.csv")
+ df = pd.read_csv("raw_data/kdd_nsl_undersampled_10000.csv")
y = df["label"]
X = df.drop("label", axis=1)
- X = np.array(X)
elif dataset == "make_classification":
X, y = make_classification(
n_samples=1000,
@@ -952,7 +960,7 @@ def load_data(dataset, precompressed):
)
y = LabelEncoder().fit(y).transform(y)
elif dataset == "truthseeker":
- df = pd.read_csv("raw_data/truthseeker.csv")
+ df = pd.read_csv("raw_data/truthseeker_undersampled_8000.csv")
y = df["BotScoreBinary"]
X = df.drop("BotScoreBinary", axis=1)
elif dataset == "sms-spam":
@@ -1002,7 +1010,7 @@ def main(args: argparse.Namespace):
Args:
args (argparse.Namespace): The command line arguments
Usage:
- python gzip_classifier.py --compressor gzip --k 3 --m 100 --method random --distance_matrix distance_matrix --dataset kdd_nsl
+ python python gzip_classifier.py --metric gzip --m 10 --sampling_method svc --dataset kdd_nsl k=3
"""
X, y = load_data(dataset=args.dataset, precompressed=args.precompressed)
@@ -1022,28 +1030,96 @@ def main(args: argparse.Namespace):
kwarg_args = params.pop("kwargs")
# conver list of key-value pairs to dictionary
kwarg_args = dict([arg.split("=") for arg in kwarg_args])
+ for k, v in kwarg_args.items():
+ # Typecast the values to the correct type
+ try:
+ kwarg_args[k] = eval(v)
+ except: # noqa E722
+ kwarg_args[k] = v
params.update(**kwarg_args)
- params["precompute"] = True
X = np.array(X) if not isinstance(X, np.ndarray) else X
y = np.array(y) if not isinstance(y, np.ndarray) else y
test_model(X_train, X_test, y_train, y_test, **params)
parser = argparse.ArgumentParser()
-parser.add_argument("--model_type", type=str, default="knn")
-parser.add_argument("--precompute", action="store_true")
-parser.add_argument("--symmetric", action="store_true")
-parser.add_argument("--metric", type=str, default="gzip", choices=all_metrics)
-parser.add_argument("--m", type=int, default=-1)
-parser.add_argument("--sampling_method", type=str, default="random")
-parser.add_argument("--distance_matrix", type=str, default=None)
-parser.add_argument("--dataset", type=str, default="kdd_nsl")
-parser.add_argument("--train_size", type=int, default=100)
-parser.add_argument("--test_size", type=int, default=100)
-parser.add_argument("--optimizer", type=str, default="accuracy")
-parser.add_argument("--precompressed", action="store_true")
-parser.add_argument("--random_state", type=int, default=42)
-parser.add_argument("kwargs", nargs=argparse.REMAINDER)
+parser.add_argument(
+ "--model_type",
+ type=str,
+ default="knn",
+ help="The type of model to use. Choices are knn, logistic, svc",
+)
+parser.add_argument(
+ "--symmetric",
+ action="store_true",
+ help="If True, the distance matrix will be treated as symmetric. Default is False.",
+)
+parser.add_argument(
+ "--metric",
+ type=str,
+ default="gzip",
+ choices=all_metrics,
+ help=f"The metric used to calculate the distance between samples. Choices are {list(all_metrics.keys())}",
+)
+parser.add_argument(
+ "--m",
+ type=int,
+ default=-1,
+ help="The number of best samples to use. If -1, all samples will be used.",
+)
+parser.add_argument(
+ "--sampling_method",
+ type=str,
+ default="random",
+ help=f"The method used to select the best training samples. Choices are {all_condensers}",
+)
+parser.add_argument(
+ "--distance_matrix",
+ type=str,
+ default=None,
+ help="The path to a numpy array representing the distance matrix. If a path is provided, the file will be loaded. Default is None.",
+)
+parser.add_argument(
+ "--dataset",
+ type=str,
+ default="kdd_nsl",
+ help="The dataset to use. Choices are 20newsgroups, kdd_nsl, make_classification, truthseeker, sms-spam, ddos.",
+)
+parser.add_argument(
+ "--train_size",
+ type=int,
+ default=100,
+ help="The number of samples to use for training. Default is 100.",
+)
+parser.add_argument(
+ "--test_size",
+ type=int,
+ default=100,
+ help="The number of samples to use for testing. Default is 100.",
+)
+parser.add_argument(
+ "--optimizer",
+ type=str,
+ default="accuracy",
+ help="The metric to use for optimization. Default is accuracy.",
+)
+parser.add_argument(
+ "--precompressed",
+ action="store_true",
+ help="If True, the data will be precompressed using gzip.",
+)
+parser.add_argument(
+ "--random_state",
+ type=int,
+ default=42,
+ help="The random state to use. Default is 42.",
+)
+parser.add_argument(
+ "kwargs",
+ nargs=argparse.REMAINDER,
+ help="Additional keyword arguments to pass to the GzipClassifier",
+)
+
if __name__ == "__main__":
args = parser.parse_args()
diff --git a/examples/gzip/objective.py b/examples/gzip/objective.py
new file mode 100644
index 00000000..39e4185e
--- /dev/null
+++ b/examples/gzip/objective.py
@@ -0,0 +1,54 @@
+import optuna
+from gzip_classifier import all_metrics
+
+
+def objective(trial: optuna.Trial):
+ model_type = trial.suggest_categorical("model_type", ["knn", "logistic", "svc"])
+ metric = trial.suggest_categorical("model.init.metric", all_metrics.keys())
+ if model_type == "knn":
+ k = trial.suggest_categorical("k", [3, 5, 7, 9, 11])
+ weights = trial.suggest_categorical("weights", ["uniform", "distance"])
+ algorithm = trial.suggest_categorical("algorithm", ["brute"])
+ params = {"k": k, "weights": weights, "algorithm": algorithm}
+ elif model_type == "logistic":
+ C = trial.suggest_loguniform("C", 1e-10, 1e10)
+ solver = trial.suggest_categorical("solver", ["saga"])
+ penalty = trial.suggest_categorical("penalty", ["l1", "l2", None])
+ fit_intercept = trial.suggest_categorical("fit_intercept", [True, False])
+ class_weight = trial.suggest_categorical("class_weight", ["balanced", None])
+ params = {
+ "C": C,
+ "solver": solver,
+ "penalty": penalty,
+ "fit_intercept": fit_intercept,
+ "class_weight": class_weight,
+ }
+ elif model_type == "svc":
+ C = trial.suggest_loguniform("C", 1e-10, 1e10)
+ kernel = trial.suggest_categorical(
+ "kernel",
+ ["linear", "rbf", "poly", "sigmoid"],
+ )
+ class_weight = trial.suggest_categorical("class_weight", ["balanced", None])
+ if kernel == "poly":
+ degree = trial.suggest_int("degree", 2, 5)
+ params = {
+ "C": C,
+ "kernel": kernel,
+ "degree": degree,
+ "class_weight": class_weight,
+ }
+ elif kernel == "rbf":
+ gamma = trial.suggest_categorical("gamma", ["auto", "scale"])
+ params = {
+ "C": C,
+ "kernel": kernel,
+ "gamma": gamma,
+ "class_weight": class_weight,
+ }
+ else:
+ params = {"C": C, "kernel": kernel, "class_weight": class_weight}
+ else:
+ raise NotImplementedError(f"Model type {model_type} not supported.")
+ params["metric"] = metric
+ params["model_name"] = f"{metric}_{model_type}"
diff --git a/examples/gzip/params.yaml b/examples/gzip/params.yaml
deleted file mode 100644
index 43dbcb17..00000000
--- a/examples/gzip/params.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-data:
- _target_: deckard.base.data.Data
- name: https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/d6a22cdb42a1db624c89f0298cb4f654d3812703/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
-dataset: kdd_nsl
-direction:
-- maximize
-files:
- _target_: deckard.base.files.FileConfig
- attack_dir: attacks
- attack_file: attack
- attack_type: .pkl
- data_dir: data
- data_file: data
- data_type: .pkl
- directory: output
- model_dir: model
- model_file: model
- model_type: .pkl
- name: default
- params_file: params.yaml
- predictions_file: predictions.json
- reports: reports
- score_dict_file: score_dict.json
-model:
- _target_: deckard.base.model.Model
- art:
- _target_: deckard.base.model.art_pipeline.ArtPipeline
- initialize:
- nb_classes: 3
- library: sklearn
- data:
- _target_: deckard.base.data.Data
- name: https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/d6a22cdb42a1db624c89f0298cb4f654d3812703/kdd_nsl.csv
- sample:
- _target_: deckard.base.data.SklearnDataSampler
- random_state: 0
- stratify: true
- test_size: 100
- train_size: 100
- sklearn_pipeline:
- encoder:
- handle_unknown: use_encoded_value
- name: sklearn.preprocessing.OrdinalEncoder
- unknown_value: -1
- preprocessor:
- name: sklearn.preprocessing.StandardScaler
- with_mean: true
- with_std: true
- target: label
- init:
- _target_: deckard.base.model.ModelInitializer
- compressor: gzip
- distance_matrix: output/model/kdd_nsl/gzip_classifier/gzip/0-100.npz
- k: 1
- m: -1
- method: random
- name: gzip_classifier.GzipClassifier
- library: sklearn
-model_name: gzip_classifier
-optimizers:
-- accuracy
-scorers:
- _target_: deckard.base.scorer.ScorerDict
- accuracy:
- _target_: deckard.base.scorer.ScorerConfig
- direction: maximize
- name: sklearn.metrics.accuracy_score
- log_loss:
- _target_: deckard.base.scorer.ScorerConfig
- direction: minimize
- name: sklearn.metrics.log_loss
-stage: train
diff --git a/examples/pytorch/cifar10/.dvc/tmp/rwlock b/examples/pytorch/cifar10/.dvc/tmp/rwlock
new file mode 100644
index 00000000..0967ef42
--- /dev/null
+++ b/examples/pytorch/cifar10/.dvc/tmp/rwlock
@@ -0,0 +1 @@
+{}
diff --git a/examples/security/classification/.gitignore b/examples/security/classification/.gitignore
index 8a746d89..273db2f4 100644
--- a/examples/security/classification/.gitignore
+++ b/examples/security/classification/.gitignore
@@ -1,3 +1,4 @@
logs/
multirun/
output/
+/retrain
diff --git a/examples/security/classification/dvc.lock b/examples/security/classification/dvc.lock
index 01a4ce87..a0fe541c 100644
--- a/examples/security/classification/dvc.lock
+++ b/examples/security/classification/dvc.lock
@@ -329,8 +329,8 @@ stages:
size: 950
- path: models.sh
hash: md5
- md5: 45472713dfccf0cd62509e7d62e223fa
- size: 5807
+ md5: 509157bdd5b524a21b8294dc2409a969
+ size: 5887
- path: output/reports/train/default/params.yaml
hash: md5
md5: d4e0a34b2b15765ca71fa5ecaf7e3826
@@ -425,75 +425,77 @@ stages:
outs:
- path: logs/models/
hash: md5
- md5: d9c5585db1b343a23229a2fb5e77cbef.dir
- size: 4828874
- nfiles: 60
+ md5: fd9e6aad79d8a1be29d42da86fd11a98.dir
+ size: 1366301
+ nfiles: 24
- path: model.db
hash: md5
- md5: de6e467e793b2519ea5db993786e263e
- size: 4870144
+ md5: 676963d31977a42501b4243cb25ab935
+ size: 593920
compile_models:
cmd: python -m deckard.layers.compile --report_folder output/reports/train/ --results_file
output/train.csv
deps:
- path: logs/models/
hash: md5
- md5: d9c5585db1b343a23229a2fb5e77cbef.dir
- size: 4828874
- nfiles: 60
+ md5: fd9e6aad79d8a1be29d42da86fd11a98.dir
+ size: 1366301
+ nfiles: 24
- path: model.db
hash: md5
- md5: de6e467e793b2519ea5db993786e263e
- size: 4870144
+ md5: 676963d31977a42501b4243cb25ab935
+ size: 593920
- path: output/reports/train/
hash: md5
- md5: fae483c6435daa9d29c947f2bce41511.dir
- size: 512957700
- nfiles: 9852
+ md5: 702efbf0ca05f21241fbfcbaeac9712b.dir
+ size: 52545076
+ nfiles: 1548
outs:
- path: output/train.csv
hash: md5
- md5: a048280df159bb5ee1ce118d0d3cfd14
- size: 3559023
+ md5: f0e4e7434085d033c5038fb1723acc25
+ size: 610341
find_best_model@rbf:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model
- --params_file best_rbf --study_name=rbf_100_10000 --default_config model.yaml
+ --params_file best_rbf --study_name=rbf_100_10000 --default_config default.yaml
+ --storage_name sqlite:///model.db
deps:
- path: logs/models/
hash: md5
- md5: d9c5585db1b343a23229a2fb5e77cbef.dir
- size: 4828874
- nfiles: 60
+ md5: fd9e6aad79d8a1be29d42da86fd11a98.dir
+ size: 1366301
+ nfiles: 24
- path: model.db
hash: md5
- md5: de6e467e793b2519ea5db993786e263e
- size: 4870144
+ md5: 676963d31977a42501b4243cb25ab935
+ size: 593920
- path: output/train.csv
hash: md5
- md5: a048280df159bb5ee1ce118d0d3cfd14
- size: 3559023
+ md5: f0e4e7434085d033c5038fb1723acc25
+ size: 610341
outs:
- path: conf/model/best_rbf.yaml
hash: md5
- md5: 0a90767d020934a3cd6d0c42a6f21606
- size: 357
+ md5: 4932ceac75d6256ce2a7864aa4a5ea3c
+ size: 359
find_best_model@linear:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model
- --params_file best_linear --study_name=linear_100_10000 --default_config model.yaml
+ --params_file best_linear --study_name=linear_100_10000 --default_config default.yaml
+ --storage_name sqlite:///model.db
deps:
- path: logs/models/
hash: md5
- md5: d9c5585db1b343a23229a2fb5e77cbef.dir
- size: 4828874
- nfiles: 60
+ md5: fd9e6aad79d8a1be29d42da86fd11a98.dir
+ size: 1366301
+ nfiles: 24
- path: model.db
hash: md5
- md5: de6e467e793b2519ea5db993786e263e
- size: 4870144
+ md5: 676963d31977a42501b4243cb25ab935
+ size: 593920
- path: output/train.csv
hash: md5
- md5: a048280df159bb5ee1ce118d0d3cfd14
- size: 3559023
+ md5: f0e4e7434085d033c5038fb1723acc25
+ size: 610341
outs:
- path: conf/model/best_linear.yaml
hash: md5
@@ -501,25 +503,26 @@ stages:
size: 332
find_best_model@poly:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model
- --params_file best_poly --study_name=poly_100_10000 --default_config model.yaml
+ --params_file best_poly --study_name=poly_100_10000 --default_config default.yaml
+ --storage_name sqlite:///model.db
deps:
- path: logs/models/
hash: md5
- md5: d9c5585db1b343a23229a2fb5e77cbef.dir
- size: 4828874
- nfiles: 60
+ md5: fd9e6aad79d8a1be29d42da86fd11a98.dir
+ size: 1366301
+ nfiles: 24
- path: model.db
hash: md5
- md5: de6e467e793b2519ea5db993786e263e
- size: 4870144
+ md5: 676963d31977a42501b4243cb25ab935
+ size: 593920
- path: output/train.csv
hash: md5
- md5: a048280df159bb5ee1ce118d0d3cfd14
- size: 3559023
+ md5: f0e4e7434085d033c5038fb1723acc25
+ size: 610341
outs:
- path: conf/model/best_poly.yaml
hash: md5
- md5: a9d600cc46e9f49c3a0cca90f7c7d876
+ md5: bd9e29f3e2e34263e48401a682a84a06
size: 370
attacks:
cmd: bash attacks.sh ++stage=attack --config-name=attack.yaml
@@ -530,34 +533,34 @@ stages:
size: 332
- path: conf/model/best_poly.yaml
hash: md5
- md5: a9d600cc46e9f49c3a0cca90f7c7d876
+ md5: bd9e29f3e2e34263e48401a682a84a06
size: 370
- path: conf/model/best_rbf.yaml
hash: md5
- md5: 0a90767d020934a3cd6d0c42a6f21606
- size: 357
+ md5: 4932ceac75d6256ce2a7864aa4a5ea3c
+ size: 359
- path: logs/models/
hash: md5
- md5: d9c5585db1b343a23229a2fb5e77cbef.dir
- size: 4828874
- nfiles: 60
+ md5: fd9e6aad79d8a1be29d42da86fd11a98.dir
+ size: 1366301
+ nfiles: 24
- path: model.db
hash: md5
- md5: de6e467e793b2519ea5db993786e263e
- size: 4870144
+ md5: 676963d31977a42501b4243cb25ab935
+ size: 593920
- path: output/train.csv
hash: md5
- md5: a048280df159bb5ee1ce118d0d3cfd14
- size: 3559023
+ md5: f0e4e7434085d033c5038fb1723acc25
+ size: 610341
outs:
- path: attack.db
hash: md5
- md5: 79ab050e04b70e212f1be85f09a974ef
- size: 2334720
+ md5: e4f26ccdc30870d9fea230d7e2f3d517
+ size: 303104
- path: logs/attacks/
hash: md5
- md5: 4eabc469a5a951cd423da83bbd47c264.dir
- size: 926809
+ md5: 9d63507c9eccf50f94d1e8bcca1e9b9a.dir
+ size: 876433
nfiles: 3
compile_attacks:
cmd: python -m deckard.layers.compile --report_folder output/reports/attack/ --results_file
@@ -565,89 +568,92 @@ stages:
deps:
- path: attack.db
hash: md5
- md5: 79ab050e04b70e212f1be85f09a974ef
- size: 2334720
+ md5: e4f26ccdc30870d9fea230d7e2f3d517
+ size: 303104
- path: logs/attacks/
hash: md5
- md5: 4eabc469a5a951cd423da83bbd47c264.dir
- size: 926809
+ md5: 9d63507c9eccf50f94d1e8bcca1e9b9a.dir
+ size: 876433
nfiles: 3
- path: output/reports/attack/
hash: md5
- md5: f610f016b9a97c37ff59de361311e5b1.dir
- size: 7978562
- nfiles: 486
+ md5: e8550da3b609d9d52ee496b0cbda8dcd.dir
+ size: 20185965
+ nfiles: 1089
outs:
- path: output/attack.csv
hash: md5
- md5: f89e17affa7e38b4955ea3edc4661f9c
- size: 188715
+ md5: e83df99bc4ec73458235032d34d479a3
+ size: 395210
find_best_attack@linear:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack
- --params_file best_linear --study_name=best_linear --default_config attack.yaml
+ --params_file best_linear --study_name=best_linear --default_config default.yaml
+ --storage_name sqlite:///attack.db --direction minimize
deps:
- path: logs/models/
hash: md5
- md5: d9c5585db1b343a23229a2fb5e77cbef.dir
- size: 4828874
- nfiles: 60
+ md5: fd9e6aad79d8a1be29d42da86fd11a98.dir
+ size: 1366301
+ nfiles: 24
- path: model.db
hash: md5
- md5: de6e467e793b2519ea5db993786e263e
- size: 4870144
+ md5: 676963d31977a42501b4243cb25ab935
+ size: 593920
- path: output/train.csv
hash: md5
- md5: a048280df159bb5ee1ce118d0d3cfd14
- size: 3559023
+ md5: f0e4e7434085d033c5038fb1723acc25
+ size: 610341
outs:
- path: conf/attack/best_linear.yaml
hash: md5
- md5: 4bb6215963ae7f0025f72ec31e26f29d
- size: 244
+ md5: b7ef4b4d709a4511ebd4f0a5e9002cdb
+ size: 248
find_best_attack@rbf:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack
- --params_file best_rbf --study_name=best_rbf --default_config attack.yaml
+ --params_file best_rbf --study_name=best_rbf --default_config default.yaml
+ --storage_name sqlite:///attack.db --direction minimize
deps:
- path: logs/models/
hash: md5
- md5: d9c5585db1b343a23229a2fb5e77cbef.dir
- size: 4828874
- nfiles: 60
+ md5: fd9e6aad79d8a1be29d42da86fd11a98.dir
+ size: 1366301
+ nfiles: 24
- path: model.db
hash: md5
- md5: de6e467e793b2519ea5db993786e263e
- size: 4870144
+ md5: 676963d31977a42501b4243cb25ab935
+ size: 593920
- path: output/train.csv
hash: md5
- md5: a048280df159bb5ee1ce118d0d3cfd14
- size: 3559023
+ md5: f0e4e7434085d033c5038fb1723acc25
+ size: 610341
outs:
- path: conf/attack/best_rbf.yaml
hash: md5
- md5: eca3091f7c0eb0b8958bc6becf43191d
- size: 244
+ md5: 74476a2360110c0c8c4e728857da2472
+ size: 252
find_best_attack@poly:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack
- --params_file best_poly --study_name=best_poly --default_config attack.yaml
+ --params_file best_poly --study_name=best_poly --default_config default.yaml
+ --storage_name sqlite:///attack.db --direction minimize
deps:
- path: logs/models/
hash: md5
- md5: d9c5585db1b343a23229a2fb5e77cbef.dir
- size: 4828874
- nfiles: 60
+ md5: fd9e6aad79d8a1be29d42da86fd11a98.dir
+ size: 1366301
+ nfiles: 24
- path: model.db
hash: md5
- md5: de6e467e793b2519ea5db993786e263e
- size: 4870144
+ md5: 676963d31977a42501b4243cb25ab935
+ size: 593920
- path: output/train.csv
hash: md5
- md5: a048280df159bb5ee1ce118d0d3cfd14
- size: 3559023
+ md5: f0e4e7434085d033c5038fb1723acc25
+ size: 610341
outs:
- path: conf/attack/best_poly.yaml
hash: md5
- md5: b5f8f874e44dbc8bdb0ababc67295174
- size: 246
+ md5: 0e7533628e42f20dc5a34c35e2fb701a
+ size: 250
other_data_train@kdd_nsl:
cmd: DATASET_NAME=kdd_nsl bash other_data.sh data=kdd_nsl +stage=train --config-name=model.yaml
deps:
@@ -683,109 +689,110 @@ stages:
deps:
- path: conf/attack/best_linear.yaml
hash: md5
- md5: 4bb6215963ae7f0025f72ec31e26f29d
- size: 244
+ md5: b7ef4b4d709a4511ebd4f0a5e9002cdb
+ size: 248
- path: conf/attack/best_poly.yaml
hash: md5
- md5: b5f8f874e44dbc8bdb0ababc67295174
- size: 246
+ md5: 0e7533628e42f20dc5a34c35e2fb701a
+ size: 250
- path: conf/attack/best_rbf.yaml
hash: md5
- md5: eca3091f7c0eb0b8958bc6becf43191d
- size: 244
+ md5: 74476a2360110c0c8c4e728857da2472
+ size: 252
- path: conf/model/best_linear.yaml
hash: md5
md5: 23a7c49f5a8ddf63a7ac89fb61c0034d
size: 332
- path: conf/model/best_poly.yaml
hash: md5
- md5: a9d600cc46e9f49c3a0cca90f7c7d876
+ md5: bd9e29f3e2e34263e48401a682a84a06
size: 370
- path: conf/model/best_rbf.yaml
hash: md5
- md5: 0a90767d020934a3cd6d0c42a6f21606
- size: 357
+ md5: 4932ceac75d6256ce2a7864aa4a5ea3c
+ size: 359
- path: output/attacks/
hash: md5
- md5: 2706070162d082792d7b52629d691d15.dir
- size: 2410072
- nfiles: 61
- - path: output/models/
- hash: md5
- md5: c7222ada919037fb45b73e4f6c1f88a2.dir
- size: 70825596
- nfiles: 1244
+ md5: 658e0a848877fbafbddd62ec5dd22dc3.dir
+ size: 4819192
+ nfiles: 121
outs:
- path: plots/after_retrain_confidence.csv
hash: md5
- md5: 8838aabe00dcca60ae5c5681174bfc7f
- size: 18011
+ md5: c2273c7a9d789de1939d5006a7a087eb
+ size: 326367
- path: plots/before_retrain_confidence.csv
hash: md5
- md5: edc0f782bfd97743823318d6b14d5d14
- size: 17994
+ md5: 1a52061abda8e60e503ea271439b8f8a
+ size: 326350
- path: retrain/
hash: md5
- md5: 062d1374edb8e366a1c65308fa4fdfbc.dir
- size: 176883
+ md5: 22c8403d05f0f866398b504f6f3c4d37.dir
+ size: 173285
nfiles: 12
plots:
cmd: python plots.py
deps:
- path: output/attack.csv
hash: md5
- md5: f89e17affa7e38b4955ea3edc4661f9c
- size: 188715
+ md5: e83df99bc4ec73458235032d34d479a3
+ size: 395210
- path: output/train.csv
hash: md5
- md5: a048280df159bb5ee1ce118d0d3cfd14
- size: 3559023
+ md5: f0e4e7434085d033c5038fb1723acc25
+ size: 610341
+ - path: plots.py
+ hash: md5
+ md5: d7b45f7ef670728e8a238909265334f2
+ size: 12114
- path: plots/after_retrain_confidence.csv
hash: md5
- md5: 8838aabe00dcca60ae5c5681174bfc7f
- size: 18011
+ md5: c2273c7a9d789de1939d5006a7a087eb
+ size: 326367
- path: plots/before_retrain_confidence.csv
hash: md5
- md5: edc0f782bfd97743823318d6b14d5d14
- size: 17994
+ md5: 1a52061abda8e60e503ea271439b8f8a
+ size: 326350
outs:
- path: plots/accuracy_vs_attack_parameters.eps
hash: md5
- md5: 62ba219171d53a6d7bee9adaaa5dcae2
- size: 41249
+ md5: 13be25e57708a0b2e7c6d062ad310b97
+ size: 38999
- path: plots/accuracy_vs_features.eps
hash: md5
- md5: 45d51ca30fc0e46849609941fc4cbb53
- size: 21450
+ md5: 3cf6dc9eb9913ab3babc82002abc5ad4
+ size: 21548
- path: plots/accuracy_vs_samples.eps
hash: md5
- md5: c7bba36d352106cdeee655e01870bdcf
- size: 23719
+ md5: be2def33826b2131795cf599a87f12de
+ size: 25049
- path: plots/confidence_vs_attack_parameters.eps
hash: md5
- md5: c2887dfae9cdfbb24d9d15d3655c3c87
- size: 40822
+ md5: 24d6d00ad927000bc60ab2012f56520c
+ size: 41436
- path: plots/retrain_accuracy.eps
hash: md5
- md5: 25d6d1ec08dc127bcd04470ca476d146
- size: 23419
+ md5: 2b62b83a5b7a37c16d25319602e102f4
+ size: 30833
- path: plots/retrain_confidence_vs_attack_parameters.eps
hash: md5
- md5: 5a6969fefe91e5c675600e07d8bff580
- size: 40819
+ md5: 860ffadab6254488091c8bc1c619f56c
+ size: 41628
- path: plots/retrain_time.eps
hash: md5
- md5: 2d28bfca3ebb7ef3b7b4fbfb69eb045f
- size: 20957
+ md5: e32d6c3cc459943ea418eea1e20fdc2f
+ size: 28407
- path: plots/train_time_vs_attack_parameters.eps
hash: md5
- md5: f56d1fc7846df9a1276749a9bd5675e9
- size: 38521
+ md5: 5e88339288029b1f53f7f02d6a88bafe
+ size: 39252
- path: plots/train_time_vs_features.eps
hash: md5
- md5: a3300cdd85533e51ce108c4f141376f6
- size: 20644
+ md5: 2bf86c698e490164eb5fe4f76743f21b
+ size: 19529
- path: plots/train_time_vs_samples.eps
hash: md5
- md5: 15f3f109c2f09c01edc6bc0e68786ce6
- size: 24036
+ md5: 99b6bb26684bccd5092e92e095f2b484
+ size: 24348
+ move_files:
+ cmd: 'cp -r ./plots/* ~/KDD-Paper-EAI-AISEC/generated/ '
diff --git a/examples/security/classification/dvc.yaml b/examples/security/classification/dvc.yaml
index e44f6357..4ee7d639 100644
--- a/examples/security/classification/dvc.yaml
+++ b/examples/security/classification/dvc.yaml
@@ -74,7 +74,7 @@ stages:
- rbf
- poly
do:
- cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model --params_file best_${item} --study_name=${item}_100_10000 --default_config model.yaml
+ cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model --params_file best_${item} --study_name=${item}_100_10000 --default_config default.yaml --storage_name sqlite:///model.db
outs:
- conf/model/best_${item}.yaml
deps:
@@ -112,7 +112,7 @@ stages:
- rbf
- poly
do:
- cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack --params_file best_${item} --study_name=best_${item} --default_config attack.yaml
+ cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack --params_file best_${item} --study_name=best_${item} --default_config default.yaml --storage_name sqlite:///attack.db --direction minimize
outs:
- conf/attack/best_${item}.yaml
deps:
@@ -122,7 +122,6 @@ stages:
retrain:
cmd : python retrain.py
deps:
- - ${files.directory}/models/
- ${files.directory}/attacks/
- conf/attack/best_linear.yaml
- conf/attack/best_rbf.yaml
@@ -142,6 +141,7 @@ stages:
- output/train.csv
- plots/before_retrain_confidence.csv
- output/attack.csv
+ - plots.py
plots :
- plots/accuracy_vs_attack_parameters.eps
- plots/accuracy_vs_features.eps
@@ -153,3 +153,7 @@ stages:
- plots/retrain_accuracy.eps
- plots/retrain_confidence_vs_attack_parameters.eps
- plots/retrain_time.eps
+ move_files:
+ cmd: >-
+ cp -r ./plots/* ~/KDD-Paper-EAI-AISEC/generated/
+ #&& rm ~/KDD-Paper-EAI-AISEC/generated/.gitignore
diff --git a/examples/security/classification/plots.py b/examples/security/classification/plots.py
index 3e515da7..b815a223 100644
--- a/examples/security/classification/plots.py
+++ b/examples/security/classification/plots.py
@@ -18,12 +18,9 @@
# else:
# results = parse_results("reports/model_queue/")
results = pd.read_csv("output/train.csv")
-input_size = (
- results["data.generate.kwargs.n_samples"]
- * results["data.generate.kwargs.n_features"]
-)
-results["Kernel"] = results["model.init.kwargs.kernel"].copy()
-results["Features"] = results["data.generate.kwargs.n_features"].copy()
+input_size = results["data.generate.n_samples"] * results["data.generate.n_features"]
+results["Kernel"] = results["model.init.kernel"].copy()
+results["Features"] = results["data.generate.n_features"].copy()
results["Samples"] = results["data.sample.train_size"].copy()
results["input_size"] = input_size
if "Unnamed: 0" in results.columns:
@@ -31,11 +28,11 @@
for col in results.columns:
if col == "data.name" and isinstance(results[col][0], list):
results[col] = results[col].apply(lambda x: x[0])
-results = results[results["model.init.kwargs.kernel"] != "sigmoid"]
+results = results[results["model.init.kernel"] != "sigmoid"]
attack_results = pd.read_csv("output/attack.csv")
-attack_results["Kernel"] = attack_results["model.init.kwargs.kernel"].copy()
-attack_results["Features"] = attack_results["data.generate.kwargs.n_features"].copy()
+attack_results["Kernel"] = attack_results["model.init.kernel"].copy()
+attack_results["Features"] = attack_results["data.generate.n_features"].copy()
attack_results["Samples"] = attack_results["data.sample.train_size"].copy()
if "Unnamed: 0" in attack_results.columns:
del attack_results["Unnamed: 0"]
@@ -50,6 +47,8 @@
data=results,
style="Kernel",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph1.legend(labels=["Linear", "RBF", "Poly"])
graph1.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
@@ -62,11 +61,13 @@
plt.gcf().clear()
graph2 = sns.lineplot(
- x="data.generate.kwargs.n_features",
+ x="data.generate.n_features",
y="accuracy",
data=results,
style="Kernel",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph2.set_xlabel("Number of Features")
graph2.set_ylabel("Accuracy")
@@ -78,11 +79,13 @@
graph3 = sns.lineplot(
- x="data.generate.kwargs.n_features",
+ x="data.generate.n_features",
y="train_time",
data=results,
style="Kernel",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph3.set_xlabel("Number of Features")
graph3.set_ylabel("Training Time")
@@ -98,6 +101,8 @@
data=results,
style="Kernel",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph4.set_xlabel("Number of Samples")
graph4.set_ylabel("Training Time")
@@ -109,7 +114,7 @@
fig, ax = plt.subplots(2, 2)
graph5 = sns.lineplot(
- x="attack.init.kwargs.eps",
+ x="attack.init.eps",
y="accuracy",
data=attack_results,
style="Kernel",
@@ -117,20 +122,24 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph5.set(xscale="log", xlabel="Perturbation Distance", ylabel="Accuracy")
graph6 = sns.lineplot(
- x="attack.init.kwargs.eps_step",
+ x="attack.init.eps_step",
y="accuracy",
data=attack_results,
style="Kernel",
ax=ax[0, 1],
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph6.set(xscale="log", xlabel="Perturbation Step", ylabel="Accuracy")
graph7 = sns.lineplot(
- x="attack.init.kwargs.max_iter",
+ x="attack.init.max_iter",
y="accuracy",
data=attack_results,
style="Kernel",
@@ -138,10 +147,12 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph7.set(xscale="log", xlabel="Maximum Iterations", ylabel="Accuracy")
graph8 = sns.lineplot(
- x="attack.init.kwargs.batch_size",
+ x="attack.init.batch_size",
y="accuracy",
data=attack_results,
style="Kernel",
@@ -149,6 +160,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph8.set(xscale="log", xlabel="Batch Size", ylabel="Accuracy")
graph6.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
@@ -158,7 +171,7 @@
fig, ax = plt.subplots(2, 2)
graph9 = sns.lineplot(
- x="attack.init.kwargs.eps",
+ x="attack.init.eps",
y="adv_fit_time",
data=attack_results,
style="Kernel",
@@ -166,20 +179,24 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="Attack Time")
graph10 = sns.lineplot(
- x="attack.init.kwargs.eps_step",
+ x="attack.init.eps_step",
y="adv_fit_time",
data=attack_results,
style="Kernel",
ax=ax[0, 1],
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="Attack Time")
graph11 = sns.lineplot(
- x="attack.init.kwargs.max_iter",
+ x="attack.init.max_iter",
y="adv_fit_time",
data=attack_results,
style="Kernel",
@@ -187,10 +204,12 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="Attack Time")
graph12 = sns.lineplot(
- x="attack.init.kwargs.batch_size",
+ x="attack.init.batch_size",
y="adv_fit_time",
data=attack_results,
style="Kernel",
@@ -198,6 +217,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph12.set(xscale="log", xlabel="Batch Size", ylabel="Attack Time")
graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
@@ -225,6 +246,8 @@
data=retrain_df,
style="Kernel",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
retrain = sns.lineplot(
x="Epochs",
@@ -234,6 +257,8 @@
color="darkred",
legend=False,
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
retrain.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
retrain.set_xlabel("Retraining Epochs")
@@ -250,6 +275,8 @@
data=retrain_df,
style="Kernel",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
retrain = sns.lineplot(
x="Epochs",
@@ -259,6 +286,8 @@
color="darkred",
legend=False,
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
retrain.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
retrain.set_xlabel("Retraining Epochs")
@@ -279,6 +308,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="False Confidence")
graph10 = sns.lineplot(
@@ -289,6 +320,8 @@
ax=ax[0, 1],
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="False Confidence")
graph11 = sns.lineplot(
@@ -300,6 +333,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="False Confidence")
graph12 = sns.lineplot(
@@ -311,6 +346,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph12.set(xscale="log", xlabel="Batch Size", ylabel="False Confidence")
graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
@@ -330,6 +367,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="False Confidence")
graph10 = sns.lineplot(
@@ -340,6 +379,8 @@
ax=ax[0, 1],
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="False Confidence")
graph11 = sns.lineplot(
@@ -351,6 +392,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="False Confidence")
graph12 = sns.lineplot(
@@ -362,6 +405,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph12.set(xscale="log", xlabel="Batch Size", ylabel="False Confidence")
graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
diff --git a/examples/security/classification/plots/.gitignore b/examples/security/classification/plots/.gitignore
new file mode 100644
index 00000000..4c882c2e
--- /dev/null
+++ b/examples/security/classification/plots/.gitignore
@@ -0,0 +1,10 @@
+/accuracy_vs_attack_parameters.eps
+/accuracy_vs_features.eps
+/accuracy_vs_samples.eps
+/confidence_vs_attack_parameters.eps
+/train_time_vs_attack_parameters.eps
+/train_time_vs_features.eps
+/train_time_vs_samples.eps
+/retrain_accuracy.eps
+/retrain_confidence_vs_attack_parameters.eps
+/retrain_time.eps
diff --git a/examples/security/classification/retrain.py b/examples/security/classification/retrain.py
index 9623e19d..8ae973e0 100644
--- a/examples/security/classification/retrain.py
+++ b/examples/security/classification/retrain.py
@@ -344,7 +344,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list:
params = json.load(f)
else:
raise ValueError(f"No params file found for {folder}")
- attack_params = params["attack"]["init"]["kwargs"]
+ attack_params = params["attack"]["init"]
attack_params.update({"name": params["attack"]["init"]["name"]})
confidence_ser["Kernel"] = name
confidence_ser["Average False Confidence"] = avg_prob
@@ -432,7 +432,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list:
else:
logger.warning(f"No params file found for {folder}")
continue
- attack_params = params["attack"]["init"]["kwargs"]
+ attack_params = params["attack"]["init"]
attack_params.update({"name": params["attack"]["init"]["name"]})
confidence_ser["Kernel"] = name
confidence_ser["Average False Confidence After Retraining"] = avg_prob
diff --git a/examples/security/kdd-nsl/.gitignore b/examples/security/kdd-nsl/.gitignore
index 8a746d89..273db2f4 100644
--- a/examples/security/kdd-nsl/.gitignore
+++ b/examples/security/kdd-nsl/.gitignore
@@ -1,3 +1,4 @@
logs/
multirun/
output/
+/retrain
diff --git a/examples/security/kdd-nsl/attacks.sh b/examples/security/kdd-nsl/attacks.sh
index 76ed02bc..8b53b739 100644
--- a/examples/security/kdd-nsl/attacks.sh
+++ b/examples/security/kdd-nsl/attacks.sh
@@ -11,7 +11,7 @@ for model_config in $CONFIG_NAMES; do
continue
fi
HYDRA_FULL_ERROR=1 python -m deckard.layers.optimise \
- ++model.init.kernel=kernel_name \
+ ++model.init.kernel=${kernel_name}\
++stage=attack \
++attack.init.name=art.attacks.evasion.ProjectedGradientDescent \
++attack.init.norm=1,2,inf \
@@ -21,6 +21,7 @@ for model_config in $CONFIG_NAMES; do
++attack.init.max_iter=1,10,100,1000 \
++hydra.sweeper.study_name=$model_config \
++attack.attack_size=100 \
+ direction=minimize \
model=$model_config $@ --multirun >> logs/attacks/$model_config.log
echo "Successfully completed model $model_config" >> attack_log.txt
done
diff --git a/examples/security/kdd-nsl/dvc.lock b/examples/security/kdd-nsl/dvc.lock
index 9497e7e0..c2fecd0f 100644
--- a/examples/security/kdd-nsl/dvc.lock
+++ b/examples/security/kdd-nsl/dvc.lock
@@ -94,39 +94,39 @@ stages:
outs:
- path: output/reports/train/default/params.yaml
hash: md5
- md5: 7234aab7d5edae504afa2090d96e4c3f
- size: 2434
+ md5: 6225c0aefe4059bfae7f5b0e04ae549a
+ size: 2189
- path: output/reports/train/default/predictions.json
hash: md5
- md5: 7e3dec7b2d06af151bf81addc33fba5a
- size: 44061
+ md5: 3c5089245ae71f1b860304a02a224078
+ size: 70072
- path: output/reports/train/default/probabilities.json
hash: md5
- md5: 7e3dec7b2d06af151bf81addc33fba5a
- size: 44061
+ md5: 3c5089245ae71f1b860304a02a224078
+ size: 70072
- path: output/reports/train/default/score_dict.json
hash: md5
- md5: 8869350701c8b1b367cdb1a33ab572d9
- size: 360
+ md5: cc368afafd0e89f04fb0ae89e64f5e0d
+ size: 716
attack:
cmd: python -m deckard.layers.experiment attack
deps:
- path: output/reports/train/default/params.yaml
hash: md5
- md5: 7234aab7d5edae504afa2090d96e4c3f
- size: 2434
+ md5: 6225c0aefe4059bfae7f5b0e04ae549a
+ size: 2189
- path: output/reports/train/default/predictions.json
hash: md5
- md5: 7e3dec7b2d06af151bf81addc33fba5a
- size: 44061
+ md5: 3c5089245ae71f1b860304a02a224078
+ size: 70072
- path: output/reports/train/default/probabilities.json
hash: md5
- md5: 7e3dec7b2d06af151bf81addc33fba5a
- size: 44061
+ md5: 3c5089245ae71f1b860304a02a224078
+ size: 70072
- path: output/reports/train/default/score_dict.json
hash: md5
- md5: 8869350701c8b1b367cdb1a33ab572d9
- size: 360
+ md5: cc368afafd0e89f04fb0ae89e64f5e0d
+ size: 716
params:
params.yaml:
attack:
@@ -315,32 +315,32 @@ stages:
outs:
- path: output/attacks/attack.pkl
hash: md5
- md5: b240c5f9c659967fe4768b5929a84905
+ md5: e250ed2062f12ee9f024bf1be33abf73
size: 1832
- path: output/reports/attack/default/adv_predictions.json
hash: md5
- md5: 36e7fcc5fe32df3a68a2603317e3d328
- size: 438
+ md5: 8cb93c0ec6db31d94298f831ac081c64
+ size: 700
- path: output/reports/attack/default/adv_probabilities.json
hash: md5
- md5: 36e7fcc5fe32df3a68a2603317e3d328
- size: 438
+ md5: 8cb93c0ec6db31d94298f831ac081c64
+ size: 700
- path: output/reports/attack/default/params.yaml
hash: md5
- md5: b300c684dc58fc23684ccefbb9f83265
- size: 5832
+ md5: 3aa13a2e1e66b911f66d9bd8a8823369
+ size: 5310
- path: output/reports/attack/default/predictions.json
hash: md5
- md5: 7e3dec7b2d06af151bf81addc33fba5a
- size: 44061
+ md5: 3c5089245ae71f1b860304a02a224078
+ size: 70072
- path: output/reports/attack/default/probabilities.json
hash: md5
- md5: 7e3dec7b2d06af151bf81addc33fba5a
- size: 44061
+ md5: 3c5089245ae71f1b860304a02a224078
+ size: 70072
- path: output/reports/attack/default/score_dict.json
hash: md5
- md5: f8b8b80b2e8369f09e1f4730fcd9ba57
- size: 582
+ md5: 595fabb17f79dca7ef3d7799e6a43388
+ size: 1235
models:
cmd: bash other_data.sh +stage=train --config-name=model.yaml
deps:
@@ -448,75 +448,77 @@ stages:
outs:
- path: logs/models/
hash: md5
- md5: ab01d57634e90f21b3b9a25ff62da3ca.dir
- size: 359561
+ md5: 3bdfd76f9298422ef6c1b55ef111802c.dir
+ size: 202845
nfiles: 3
- path: model.db
hash: md5
- md5: 081a4f2934142058dbe5674f8d087031
- size: 733184
+ md5: 155463edba880de94ed717294def04a8
+ size: 208896
compile_models:
cmd: python -m deckard.layers.compile --report_folder output/reports/train/ --results_file
output/train.csv
deps:
- path: logs/models/
hash: md5
- md5: ab01d57634e90f21b3b9a25ff62da3ca.dir
- size: 359561
+ md5: 3bdfd76f9298422ef6c1b55ef111802c.dir
+ size: 202845
nfiles: 3
- path: model.db
hash: md5
- md5: 081a4f2934142058dbe5674f8d087031
- size: 733184
+ md5: 155463edba880de94ed717294def04a8
+ size: 208896
- path: output/reports/train/
hash: md5
- md5: 4bbc6640609fdcd2e3d8595678dc22c8.dir
- size: 42445285
- nfiles: 1672
+ md5: df8221c356532e382e7f6909027e1648.dir
+ size: 11786125
+ nfiles: 336
outs:
- path: output/train.csv
hash: md5
- md5: c740b7ccc67c3f38a04446ad0afe5ce6
- size: 611967
+ md5: 4508b28e78d9b4d38dd60a10b54798dc
+ size: 164189
find_best_model@rbf:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model
- --params_file best_rbf --study_name=rbf --default_config model.yaml
+ --params_file best_rbf --study_name=rbf --default_config default.yaml --storage_name
+ sqlite:///model.db
deps:
- path: logs/models/
hash: md5
- md5: ab01d57634e90f21b3b9a25ff62da3ca.dir
- size: 359561
+ md5: 3bdfd76f9298422ef6c1b55ef111802c.dir
+ size: 202845
nfiles: 3
- path: model.db
hash: md5
- md5: 081a4f2934142058dbe5674f8d087031
- size: 733184
+ md5: 155463edba880de94ed717294def04a8
+ size: 208896
- path: output/train.csv
hash: md5
- md5: c740b7ccc67c3f38a04446ad0afe5ce6
- size: 611967
+ md5: 4508b28e78d9b4d38dd60a10b54798dc
+ size: 164189
outs:
- path: conf/model/best_rbf.yaml
hash: md5
- md5: 3092c0288833989d2e77d849993a2a40
- size: 360
+ md5: 7210f1655e71b637d09822e3faa1f0ff
+ size: 358
find_best_model@linear:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model
- --params_file best_linear --study_name=linear --default_config model.yaml
+ --params_file best_linear --study_name=linear --default_config default.yaml
+ --storage_name sqlite:///model.db
deps:
- path: logs/models/
hash: md5
- md5: ab01d57634e90f21b3b9a25ff62da3ca.dir
- size: 359561
+ md5: 3bdfd76f9298422ef6c1b55ef111802c.dir
+ size: 202845
nfiles: 3
- path: model.db
hash: md5
- md5: 081a4f2934142058dbe5674f8d087031
- size: 733184
+ md5: 155463edba880de94ed717294def04a8
+ size: 208896
- path: output/train.csv
hash: md5
- md5: c740b7ccc67c3f38a04446ad0afe5ce6
- size: 611967
+ md5: 4508b28e78d9b4d38dd60a10b54798dc
+ size: 164189
outs:
- path: conf/model/best_linear.yaml
hash: md5
@@ -524,26 +526,27 @@ stages:
size: 330
find_best_model@poly:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model
- --params_file best_poly --study_name=poly --default_config model.yaml
+ --params_file best_poly --study_name=poly --default_config default.yaml --storage_name
+ sqlite:///model.db
deps:
- path: logs/models/
hash: md5
- md5: ab01d57634e90f21b3b9a25ff62da3ca.dir
- size: 359561
+ md5: 3bdfd76f9298422ef6c1b55ef111802c.dir
+ size: 202845
nfiles: 3
- path: model.db
hash: md5
- md5: 081a4f2934142058dbe5674f8d087031
- size: 733184
+ md5: 155463edba880de94ed717294def04a8
+ size: 208896
- path: output/train.csv
hash: md5
- md5: c740b7ccc67c3f38a04446ad0afe5ce6
- size: 611967
+ md5: 4508b28e78d9b4d38dd60a10b54798dc
+ size: 164189
outs:
- path: conf/model/best_poly.yaml
hash: md5
- md5: 12f892f3ba4ef8bab095b36bd7558d3e
- size: 372
+ md5: 49c26d851f36ef980b4a5bb1dabfebd8
+ size: 370
attacks:
cmd: bash attacks.sh ++stage=attack --config-name=attack.yaml
deps:
@@ -553,34 +556,34 @@ stages:
size: 330
- path: conf/model/best_poly.yaml
hash: md5
- md5: 12f892f3ba4ef8bab095b36bd7558d3e
- size: 372
+ md5: 49c26d851f36ef980b4a5bb1dabfebd8
+ size: 370
- path: conf/model/best_rbf.yaml
hash: md5
- md5: 3092c0288833989d2e77d849993a2a40
- size: 360
+ md5: 7210f1655e71b637d09822e3faa1f0ff
+ size: 358
- path: logs/models/
hash: md5
- md5: ab01d57634e90f21b3b9a25ff62da3ca.dir
- size: 359561
+ md5: 3bdfd76f9298422ef6c1b55ef111802c.dir
+ size: 202845
nfiles: 3
- path: model.db
hash: md5
- md5: 081a4f2934142058dbe5674f8d087031
- size: 733184
+ md5: 155463edba880de94ed717294def04a8
+ size: 208896
- path: output/train.csv
hash: md5
- md5: c740b7ccc67c3f38a04446ad0afe5ce6
- size: 611967
+ md5: 4508b28e78d9b4d38dd60a10b54798dc
+ size: 164189
outs:
- path: attack.db
hash: md5
- md5: 380effd61d22da8bc2b0f655e67f1cf0
- size: 700416
+ md5: 37f5c17e7689935a334caf09c8aac40c
+ size: 315392
- path: logs/attacks/
hash: md5
- md5: e3d5880a8a34d62926f202472f635636.dir
- size: 7098648
+ md5: 18f2cba5502fa20600145eb551f2e64b.dir
+ size: 1695110
nfiles: 3
compile_attacks:
cmd: python -m deckard.layers.compile --report_folder output/reports/attack/ --results_file
@@ -588,89 +591,92 @@ stages:
deps:
- path: attack.db
hash: md5
- md5: 380effd61d22da8bc2b0f655e67f1cf0
- size: 700416
+ md5: 37f5c17e7689935a334caf09c8aac40c
+ size: 315392
- path: logs/attacks/
hash: md5
- md5: e3d5880a8a34d62926f202472f635636.dir
- size: 7098648
+ md5: 18f2cba5502fa20600145eb551f2e64b.dir
+ size: 1695110
nfiles: 3
- path: output/reports/attack/
hash: md5
- md5: 9a8c30a61ea2025b38ad09a7bd1a8e82.dir
- size: 64940922
- nfiles: 4355
+ md5: b71df3c8f2374573d6170f3223aa9b9c.dir
+ size: 39783146
+ nfiles: 2169
outs:
- path: output/attack.csv
hash: md5
- md5: b0d1e2263515e400f6303c3afb0f5cfd
- size: 1545938
+ md5: 3ba52610fa5c0f042ceb92c3139f5596
+ size: 983830
find_best_attack@linear:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack
- --params_file best_linear --study_name=best_linear --default_config attack.yaml
+ --params_file best_linear --study_name=best_linear --default_config default.yaml
+ --storage_name sqlite:///attack.db --direction minimize
deps:
- path: attack.db
hash: md5
- md5: 380effd61d22da8bc2b0f655e67f1cf0
- size: 700416
+ md5: 37f5c17e7689935a334caf09c8aac40c
+ size: 315392
- path: logs/models/
hash: md5
- md5: ab01d57634e90f21b3b9a25ff62da3ca.dir
- size: 359561
+ md5: 3bdfd76f9298422ef6c1b55ef111802c.dir
+ size: 202845
nfiles: 3
- path: output/train.csv
hash: md5
- md5: c740b7ccc67c3f38a04446ad0afe5ce6
- size: 611967
+ md5: 4508b28e78d9b4d38dd60a10b54798dc
+ size: 164189
outs:
- path: conf/attack/best_linear.yaml
hash: md5
- md5: f048059aaa0e383f9c5ae9c085927588
- size: 231
+ md5: d154a851ce6ec4fd55b11dbc50bea318
+ size: 249
find_best_attack@rbf:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack
- --params_file best_rbf --study_name=best_rbf --default_config attack.yaml
+ --params_file best_rbf --study_name=best_rbf --default_config default.yaml
+ --storage_name sqlite:///attack.db --direction minimize
deps:
- path: attack.db
hash: md5
- md5: 380effd61d22da8bc2b0f655e67f1cf0
- size: 700416
+ md5: 37f5c17e7689935a334caf09c8aac40c
+ size: 315392
- path: logs/models/
hash: md5
- md5: ab01d57634e90f21b3b9a25ff62da3ca.dir
- size: 359561
+ md5: 3bdfd76f9298422ef6c1b55ef111802c.dir
+ size: 202845
nfiles: 3
- path: output/train.csv
hash: md5
- md5: c740b7ccc67c3f38a04446ad0afe5ce6
- size: 611967
+ md5: 4508b28e78d9b4d38dd60a10b54798dc
+ size: 164189
outs:
- path: conf/attack/best_rbf.yaml
hash: md5
- md5: 936f60710cd2fba6d1b3584accc94943
- size: 246
+ md5: c68a838c04899ee68e0072f640af2f21
+ size: 248
find_best_attack@poly:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack
- --params_file best_poly --study_name=best_poly --default_config attack.yaml
+ --params_file best_poly --study_name=best_poly --default_config default.yaml
+ --storage_name sqlite:///attack.db --direction minimize
deps:
- path: attack.db
hash: md5
- md5: 380effd61d22da8bc2b0f655e67f1cf0
- size: 700416
+ md5: 37f5c17e7689935a334caf09c8aac40c
+ size: 315392
- path: logs/models/
hash: md5
- md5: ab01d57634e90f21b3b9a25ff62da3ca.dir
- size: 359561
+ md5: 3bdfd76f9298422ef6c1b55ef111802c.dir
+ size: 202845
nfiles: 3
- path: output/train.csv
hash: md5
- md5: c740b7ccc67c3f38a04446ad0afe5ce6
- size: 611967
+ md5: 4508b28e78d9b4d38dd60a10b54798dc
+ size: 164189
outs:
- path: conf/attack/best_poly.yaml
hash: md5
- md5: 26b55aad33b06e46b07904b00c5cb236
- size: 228
+ md5: 33974287420fdf63175bb6e0212a1e9b
+ size: 251
other_data_train@kdd_nsl:
cmd: DATASET_NAME=kdd_nsl bash other_data.sh data=kdd_nsl +stage=train --config-name=model.yaml
deps:
@@ -706,93 +712,94 @@ stages:
deps:
- path: conf/attack/best_linear.yaml
hash: md5
- md5: f048059aaa0e383f9c5ae9c085927588
- size: 231
+ md5: d154a851ce6ec4fd55b11dbc50bea318
+ size: 249
- path: conf/attack/best_poly.yaml
hash: md5
- md5: 26b55aad33b06e46b07904b00c5cb236
- size: 228
+ md5: 33974287420fdf63175bb6e0212a1e9b
+ size: 251
- path: conf/attack/best_rbf.yaml
hash: md5
- md5: 936f60710cd2fba6d1b3584accc94943
- size: 246
+ md5: c68a838c04899ee68e0072f640af2f21
+ size: 248
- path: conf/model/best_linear.yaml
hash: md5
md5: e4ae7059114d8724d4947e952145d4fe
size: 330
- path: conf/model/best_poly.yaml
hash: md5
- md5: 12f892f3ba4ef8bab095b36bd7558d3e
- size: 372
+ md5: 49c26d851f36ef980b4a5bb1dabfebd8
+ size: 370
- path: conf/model/best_rbf.yaml
hash: md5
- md5: 3092c0288833989d2e77d849993a2a40
- size: 360
+ md5: 7210f1655e71b637d09822e3faa1f0ff
+ size: 358
- path: output/attacks/
hash: md5
- md5: 4551130dd81dfa20db94f2888d04675c.dir
- size: 725472
- nfiles: 396
- - path: output/models/
- hash: md5
- md5: a738ec4b74e79472cfce860968cba882.dir
- size: 2390233
- nfiles: 279
+ md5: fa1bb6df926ae12f22c2651ab77c3a86.dir
+ size: 4070312
+ nfiles: 241
outs:
- path: plots/after_retrain_confidence.csv
hash: md5
- md5: ce54cebd30fd5088597f7db85eab1754
- size: 114012
+ md5: d06f8ccd3410c566773776bee2933753
+ size: 785930
- path: plots/before_retrain_confidence.csv
hash: md5
- md5: 82ff291d66e8f067a223cfcf1f117f63
- size: 113995
+ md5: 7289fa5bcd5712d52801b76b36159d80
+ size: 785913
- path: retrain/
hash: md5
- md5: 5f501f7245ed485c6d1d0e5ac44297a3.dir
- size: 174463
+ md5: 9f340584668054abbc4cda10df68f660.dir
+ size: 172962
nfiles: 12
plots:
cmd: python plots.py
deps:
- path: output/attack.csv
hash: md5
- md5: b0d1e2263515e400f6303c3afb0f5cfd
- size: 1545938
+ md5: 3ba52610fa5c0f042ceb92c3139f5596
+ size: 983830
- path: output/train.csv
hash: md5
- md5: c740b7ccc67c3f38a04446ad0afe5ce6
- size: 611967
+ md5: 4508b28e78d9b4d38dd60a10b54798dc
+ size: 164189
+ - path: plots.py
+ hash: md5
+ md5: 6f0729bdca6bafc3c92faca71dc8c97e
+ size: 10164
- path: plots/after_retrain_confidence.csv
hash: md5
- md5: ce54cebd30fd5088597f7db85eab1754
- size: 114012
+ md5: d06f8ccd3410c566773776bee2933753
+ size: 785930
- path: plots/before_retrain_confidence.csv
hash: md5
- md5: 82ff291d66e8f067a223cfcf1f117f63
- size: 113995
+ md5: 7289fa5bcd5712d52801b76b36159d80
+ size: 785913
outs:
- - path: plots/accuracy_vs_attack_parameters.pdf
+ - path: plots/accuracy_vs_attack_parameters.eps
hash: md5
- md5: 8adf0a397611373445d6d4537acd494d
- size: 16715
- - path: plots/confidence_vs_attack_parameters.pdf
+ md5: 8174380cd1e3153249aa7f4095905d82
+ size: 39189
+ - path: plots/confidence_vs_attack_parameters.eps
hash: md5
- md5: de3ef58684597cc5e71a4f6062128fe7
- size: 18202
- - path: plots/retrain_accuracy.pdf
+ md5: e612551ce45bfb4fbd134c0058ae038d
+ size: 41785
+ - path: plots/retrain_accuracy.eps
hash: md5
- md5: 577e89d46eb6f2446d0a3ed83b4f9e19
- size: 13913
- - path: plots/retrain_confidence_vs_attack_parameters.pdf
+ md5: 5d0161b9c44e397e167e200738709fe3
+ size: 30829
+ - path: plots/retrain_confidence_vs_attack_parameters.eps
hash: md5
- md5: 4f7b2f8e2a7a4552816389bd1dcaa074
- size: 18181
- - path: plots/retrain_time.pdf
+ md5: 76c457aeabd26983a5fc3a129e942c0a
+ size: 42149
+ - path: plots/retrain_time.eps
hash: md5
- md5: 7ad5725d3c3033b796ece976881d852d
- size: 12896
- - path: plots/train_time_vs_attack_parameters.pdf
+ md5: 461075c4b7f2f693c22f96e34db026ca
+ size: 28368
+ - path: plots/train_time_vs_attack_parameters.eps
hash: md5
- md5: c2436157654bd664dc06528fcbfc834a
- size: 17032
+ md5: 59de7016df4a8380776a7ea0dd160359
+ size: 39247
+ move_files:
+ cmd: cp -r plots/* ~/KDD-Paper-EAI-AISEC/kdd-nsl/
diff --git a/examples/security/kdd-nsl/dvc.yaml b/examples/security/kdd-nsl/dvc.yaml
index 04164939..b3ea885c 100644
--- a/examples/security/kdd-nsl/dvc.yaml
+++ b/examples/security/kdd-nsl/dvc.yaml
@@ -70,7 +70,7 @@ stages:
- rbf
- poly
do:
- cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model --params_file best_${item} --study_name=${item} --default_config model.yaml
+ cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model --params_file best_${item} --study_name=${item} --default_config default.yaml --storage_name sqlite:///model.db
outs:
- conf/model/best_${item}.yaml
deps:
@@ -108,7 +108,7 @@ stages:
- rbf
- poly
do:
- cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack --params_file best_${item} --study_name=best_${item} --default_config attack.yaml
+ cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack --params_file best_${item} --study_name=best_${item} --default_config default.yaml --storage_name sqlite:///attack.db --direction minimize
outs:
- conf/attack/best_${item}.yaml
deps:
@@ -118,7 +118,6 @@ stages:
retrain:
cmd : python retrain.py
deps:
- - ${files.directory}/models/
- ${files.directory}/attacks/
- conf/attack/best_linear.yaml
- conf/attack/best_rbf.yaml
@@ -134,18 +133,22 @@ stages:
plots:
cmd : python plots.py
deps :
+ - plots.py
- plots/after_retrain_confidence.csv
- output/attack.csv
- plots/before_retrain_confidence.csv
- output/train.csv
plots :
- - plots/accuracy_vs_attack_parameters.pdf
- # - plots/accuracy_vs_features.pdf
- # - plots/accuracy_vs_samples.pdf
- - plots/confidence_vs_attack_parameters.pdf
- - plots/train_time_vs_attack_parameters.pdf
- # - plots/train_time_vs_features.pdf
- # - plots/train_time_vs_samples.pdf
- - plots/retrain_accuracy.pdf
- - plots/retrain_confidence_vs_attack_parameters.pdf
- - plots/retrain_time.pdf
+ - plots/accuracy_vs_attack_parameters.eps
+ # - plots/accuracy_vs_features.eps
+ # - plots/accuracy_vs_samples.eps
+ - plots/confidence_vs_attack_parameters.eps
+ - plots/train_time_vs_attack_parameters.eps
+ # - plots/train_time_vs_features.eps
+ # - plots/train_time_vs_samples.eps
+ - plots/retrain_accuracy.eps
+ - plots/retrain_confidence_vs_attack_parameters.eps
+ - plots/retrain_time.eps
+ move_files:
+ cmd: >-
+ cp -r plots/* ~/KDD-Paper-EAI-AISEC/kdd-nsl/
diff --git a/examples/security/kdd-nsl/plots.py b/examples/security/kdd-nsl/plots.py
index 06375d98..b5499185 100644
--- a/examples/security/kdd-nsl/plots.py
+++ b/examples/security/kdd-nsl/plots.py
@@ -18,28 +18,16 @@
# else:
# results = parse_results("reports/model_queue/")
results = pd.read_csv("output/train.csv")
-# input_size = results["data.generate.kwargs.n_samples"] * results["data.generate.kwargs.n_features"]
-results["Kernel"] = results["model.init.kwargs.kernel"].copy()
-# results["Features"] = results["data.generate.kwargs.n_features"].copy()
-results["Samples"] = results["data.sample.train_size"].copy()
-# results["input_size"] = input_size
-# sample_list = results["data.generate.kwargs.n_samples"].unique()
-# feature_list = results["data.generate.kwargs.n_features"].unique()
-kernel_list = results["model.init.kwargs.kernel"].unique()
+results["Kernel"] = results["model.init.kernel"].copy()
if "Unnamed: 0" in results.columns:
del results["Unnamed: 0"]
for col in results.columns:
if col == "data.name" and isinstance(results[col][0], list):
results[col] = results[col].apply(lambda x: x[0])
-results = results[results["model.init.kwargs.kernel"] != "sigmoid"]
+results = results[results["model.init.kernel"] != "sigmoid"]
attack_results = pd.read_csv("output/attack.csv")
-attack_results["Kernel"] = attack_results["model.init.kwargs.kernel"].copy()
-# attack_results["Features"] = attack_results["data.generate.kwargs.n_features"].copy()
-# attack_results["Samples"] = attack_results["data.sample.train_size"].copy()
-# sample_list = attack_results["data.generate.kwargs.n_samples"].unique()
-# feature_list = attack_results["data.generate.kwargs.n_features"].unique()
-kernel_list = attack_results["model.init.kwargs.kernel"].unique()
+attack_results["Kernel"] = attack_results["model.init.kernel"].copy()
if "Unnamed: 0" in attack_results.columns:
del attack_results["Unnamed: 0"]
for col in attack_results.columns:
@@ -47,75 +35,26 @@
attack_results[col] = attack_results[col].apply(lambda x: x[0])
-# graph1 = sns.lineplot(
-# x="data.sample.train_size",
-# y="accuracy",
-# data=results,
-# style="Kernel",
-# style_order=["rbf", "poly", "linear"],
-# )
-# graph1.legend(labels=["Linear", "RBF", "Poly"])
-# graph1.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
-# graph1.set_xlabel("Number of Samples")
-# graph1.set_ylabel("Accuracy")
-# graph1.set_xscale("log")
-# graph1.get_figure().tight_layout()
-# graph1.get_figure().savefig("plots/accuracy_vs_samples.pdf")
-# plt.gcf().clear()
-
-# graph2 = sns.lineplot(
-# x="data.generate.kwargs.n_features",
-# y="accuracy",
-# data=results,
-# style="Kernel",
-# style_order=["rbf", "poly", "linear"],
-# )
-# graph2.set_xlabel("Number of Features")
-# graph2.set_ylabel("Accuracy")
-# graph2.set_xscale("log")
-# graph2.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
-# graph2.get_figure().tight_layout()
-# graph2.get_figure().savefig("plots/accuracy_vs_features.pdf")
-# plt.gcf().clear()
-
-# results["train_time"] = (
-# results["train_time"]
-# * results["data.sample.train_size"]
-# * results["data.generate.kwargs.n_samples"]
-# )
-# graph3 = sns.lineplot(
-# x="data.generate.kwargs.n_features",
-# y="train_time",
-# data=results,
-# style="Kernel",
-# style_order=["rbf", "poly", "linear"],
-# )
-# graph3.set_xlabel("Number of Features")
-# graph3.set_ylabel("Training Time")
-# graph3.set(yscale="log", xscale="log")
-# graph3.legend(title="Kernel")
-# graph3.get_figure().tight_layout()
-# graph3.get_figure().savefig("plots/train_time_vs_features.pdf")
-# plt.gcf().clear()
-
-# graph4 = sns.lineplot(
-# x="data.sample.train_size",
-# y="train_time",
-# data=results,
-# style="Kernel",
-# style_order=["rbf", "poly", "linear"],
-# )
-# graph4.set_xlabel("Number of Samples")
-# graph4.set_ylabel("Training Time")
-# graph4.set(yscale="log", xscale="log")
-# graph4.legend(title="Kernel")
-# graph4.get_figure().tight_layout()
-# graph4.get_figure().savefig("plots/train_time_vs_samples.pdf")
-# plt.gcf().clear()
+graph4 = sns.lineplot(
+ x="data.sample.train_size",
+ y="train_time",
+ data=results,
+ style="Kernel",
+ style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
+)
+graph4.set_xlabel("Number of Samples")
+graph4.set_ylabel("Training Time")
+graph4.set(yscale="log", xscale="log", xlim=(10, 1e6))
+graph4.legend(title="Kernel")
+graph4.get_figure().tight_layout()
+graph4.get_figure().savefig("plots/train_time_vs_samples.eps")
+plt.gcf().clear()
fig, ax = plt.subplots(2, 2)
graph5 = sns.lineplot(
- x="attack.init.kwargs.eps",
+ x="attack.init.eps",
y="accuracy",
data=attack_results,
style="Kernel",
@@ -123,20 +62,24 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph5.set(xscale="log", xlabel="Perturbation Distance", ylabel="Accuracy")
graph6 = sns.lineplot(
- x="attack.init.kwargs.eps_step",
+ x="attack.init.eps_step",
y="accuracy",
data=attack_results,
style="Kernel",
ax=ax[0, 1],
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph6.set(xscale="log", xlabel="Perturbation Step", ylabel="Accuracy")
graph7 = sns.lineplot(
- x="attack.init.kwargs.max_iter",
+ x="attack.init.max_iter",
y="accuracy",
data=attack_results,
style="Kernel",
@@ -144,10 +87,12 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph7.set(xscale="log", xlabel="Maximum Iterations", ylabel="Accuracy")
graph8 = sns.lineplot(
- x="attack.init.kwargs.batch_size",
+ x="attack.init.batch_size",
y="accuracy",
data=attack_results,
style="Kernel",
@@ -155,16 +100,18 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph8.set(xscale="log", xlabel="Batch Size", ylabel="Accuracy")
graph6.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
fig.tight_layout()
-fig.savefig("plots/accuracy_vs_attack_parameters.pdf")
+fig.savefig("plots/accuracy_vs_attack_parameters.eps")
plt.gcf().clear()
fig, ax = plt.subplots(2, 2)
graph9 = sns.lineplot(
- x="attack.init.kwargs.eps",
+ x="attack.init.eps",
y="adv_fit_time",
data=attack_results,
style="Kernel",
@@ -172,20 +119,24 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="Attack Time")
graph10 = sns.lineplot(
- x="attack.init.kwargs.eps_step",
+ x="attack.init.eps_step",
y="adv_fit_time",
data=attack_results,
style="Kernel",
ax=ax[0, 1],
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="Attack Time")
graph11 = sns.lineplot(
- x="attack.init.kwargs.max_iter",
+ x="attack.init.max_iter",
y="adv_fit_time",
data=attack_results,
style="Kernel",
@@ -193,10 +144,12 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="Attack Time")
graph12 = sns.lineplot(
- x="attack.init.kwargs.batch_size",
+ x="attack.init.batch_size",
y="adv_fit_time",
data=attack_results,
style="Kernel",
@@ -204,11 +157,13 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph12.set(xscale="log", xlabel="Batch Size", ylabel="Attack Time")
graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
fig.tight_layout(h_pad=0.5)
-fig.savefig("plots/train_time_vs_attack_parameters.pdf")
+fig.savefig("plots/train_time_vs_attack_parameters.eps")
plt.gcf().clear()
retrain_df = pd.DataFrame()
@@ -231,6 +186,8 @@
data=retrain_df,
style="Kernel",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
retrain = sns.lineplot(
x="Epochs",
@@ -240,12 +197,14 @@
color="darkred",
legend=False,
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
retrain.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
retrain.set_xlabel("Retraining Epochs")
retrain.set_ylabel("Accuracy")
retrain.get_figure().tight_layout()
-retrain.get_figure().savefig("plots/retrain_accuracy.pdf")
+retrain.get_figure().savefig("plots/retrain_accuracy.eps")
plt.gcf().clear()
retrain_df["ben_time"] = retrain_df["ben_time"] * retrain_df["train_size"] * 10
@@ -256,6 +215,8 @@
data=retrain_df,
style="Kernel",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
retrain = sns.lineplot(
x="Epochs",
@@ -265,13 +226,15 @@
color="darkred",
legend=False,
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
retrain.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
retrain.set_xlabel("Retraining Epochs")
retrain.set_ylabel("Time")
retrain.set_yscale("log")
retrain.get_figure().tight_layout()
-retrain.get_figure().savefig("plots/retrain_time.pdf")
+retrain.get_figure().savefig("plots/retrain_time.eps")
plt.gcf().clear()
confidence_df = pd.read_csv("plots/before_retrain_confidence.csv")
@@ -285,6 +248,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="False Confidence")
graph10 = sns.lineplot(
@@ -295,6 +260,8 @@
ax=ax[0, 1],
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="False Confidence")
graph11 = sns.lineplot(
@@ -306,6 +273,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="False Confidence")
graph12 = sns.lineplot(
@@ -317,11 +286,13 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph12.set(xscale="log", xlabel="Batch Size", ylabel="False Confidence")
graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
fig.tight_layout(h_pad=0.5)
-fig.savefig("plots/confidence_vs_attack_parameters.pdf")
+fig.savefig("plots/confidence_vs_attack_parameters.eps")
plt.gcf().clear()
confdence_df = pd.read_csv("plots/after_retrain_confidence.csv")
@@ -336,6 +307,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="False Confidence")
graph10 = sns.lineplot(
@@ -346,6 +319,8 @@
ax=ax[0, 1],
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="False Confidence")
graph11 = sns.lineplot(
@@ -357,6 +332,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="False Confidence")
graph12 = sns.lineplot(
@@ -368,9 +345,11 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph12.set(xscale="log", xlabel="Batch Size", ylabel="False Confidence")
graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
fig.tight_layout(h_pad=0.5)
-fig.savefig("plots/retrain_confidence_vs_attack_parameters.pdf")
+fig.savefig("plots/retrain_confidence_vs_attack_parameters.eps")
plt.gcf().clear()
diff --git a/examples/security/kdd-nsl/plots/.gitignore b/examples/security/kdd-nsl/plots/.gitignore
index 642f14d4..f09089fa 100644
--- a/examples/security/kdd-nsl/plots/.gitignore
+++ b/examples/security/kdd-nsl/plots/.gitignore
@@ -4,9 +4,3 @@
/retrain_accuracy.eps
/retrain_confidence_vs_attack_parameters.eps
/retrain_time.eps
-/accuracy_vs_attack_parameters.pdf
-/confidence_vs_attack_parameters.pdf
-/train_time_vs_attack_parameters.pdf
-/retrain_accuracy.pdf
-/retrain_confidence_vs_attack_parameters.pdf
-/retrain_time.pdf
diff --git a/examples/security/kdd-nsl/plots/train_time_vs_samples.eps b/examples/security/kdd-nsl/plots/train_time_vs_samples.eps
new file mode 100644
index 00000000..8646b377
--- /dev/null
+++ b/examples/security/kdd-nsl/plots/train_time_vs_samples.eps
@@ -0,0 +1,1373 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Title: train_time_vs_samples.eps
+%%Creator: Matplotlib v3.7.2, https://matplotlib.org/
+%%CreationDate: Tue Jul 16 16:20:51 2024
+%%Orientation: portrait
+%%BoundingBox: 75 223 537 569
+%%HiResBoundingBox: 75.600000 223.200000 536.400000 568.800000
+%%EndComments
+%%BeginProlog
+/mpldict 11 dict def
+mpldict begin
+/_d { bind def } bind def
+/m { moveto } _d
+/l { lineto } _d
+/r { rlineto } _d
+/c { curveto } _d
+/cl { closepath } _d
+/ce { closepath eofill } _d
+/box {
+ m
+ 1 index 0 r
+ 0 exch r
+ neg 0 r
+ cl
+ } _d
+/clipbox {
+ box
+ clip
+ newpath
+ } _d
+/sc { setcachedevice } _d
+%!PS-Adobe-3.0 Resource-Font
+%%Creator: Converted from TrueType to Type 3 by Matplotlib.
+10 dict begin
+/FontName /DejaVuSerif def
+/PaintType 0 def
+/FontMatrix [0.00048828125 0 0 0.00048828125 0 0] def
+/FontBBox [-1576 -710 4312 2272] def
+/FontType 3 def
+/Encoding [/minus /space /period /zero /one /two /three /four /five /six /eight /K /N /S /T /multiply /a /b /e /f /g /i /l /m /n /o /p /r /s /u /y] def
+/CharStrings 32 dict dup begin
+/.notdef 0 def
+/minus{1716 0 217 561 1499 723 sc
+217 723 m
+1499 723 l
+1499 561 l
+217 561 l
+217 723 l
+
+ce} _d
+/space{651 0 0 0 0 0 sc
+ce} _d
+/period{651 0 193 -29 459 238 sc
+193 104 m
+193 141 206 173 231 199 c
+256 225 288 238 326 238 c
+363 238 394 225 420 199 c
+446 173 459 141 459 104 c
+459 67 446 36 420 10 c
+394 -16 363 -29 326 -29 c
+288 -29 256 -16 231 9 c
+206 35 193 67 193 104 c
+
+ce} _d
+/zero{1303 0 135 -29 1167 1520 sc
+651 70 m
+753 70 829 126 880 238 c
+931 350 956 519 956 745 c
+956 972 931 1141 880 1253 c
+829 1365 753 1421 651 1421 c
+549 1421 473 1365 422 1253 c
+371 1141 346 972 346 745 c
+346 519 371 350 422 238 c
+473 126 549 70 651 70 c
+
+651 -29 m
+489 -29 362 39 271 175 c
+180 311 135 501 135 745 c
+135 990 180 1180 271 1316 c
+362 1452 489 1520 651 1520 c
+814 1520 940 1452 1031 1316 c
+1122 1180 1167 990 1167 745 c
+1167 501 1122 311 1031 175 c
+940 39 814 -29 651 -29 c
+
+ce} _d
+/one{1303 0 250 0 1012 1520 sc
+291 0 m
+291 106 l
+551 106 l
+551 1348 l
+250 1153 l
+250 1284 l
+614 1520 l
+752 1520 l
+752 106 l
+1012 106 l
+1012 0 l
+291 0 l
+
+ce} _d
+/two{1303 0 139 0 1102 1520 sc
+262 1137 m
+150 1137 l
+150 1403 l
+221 1441 293 1470 365 1490 c
+438 1510 509 1520 578 1520 c
+733 1520 856 1482 946 1407 c
+1036 1332 1081 1229 1081 1100 c
+1081 954 979 779 775 576 c
+759 561 747 549 739 541 c
+362 164 l
+985 164 l
+985 348 l
+1102 348 l
+1102 0 l
+139 0 l
+139 109 l
+592 561 l
+692 661 763 753 806 836 c
+849 920 870 1008 870 1100 c
+870 1201 844 1279 791 1336 c
+739 1393 667 1421 575 1421 c
+480 1421 406 1397 354 1350 c
+302 1303 271 1232 262 1137 c
+
+ce} _d
+/three{1303 0 156 -29 1151 1520 sc
+199 1430 m
+277 1459 352 1482 423 1497 c
+495 1512 562 1520 625 1520 c
+771 1520 885 1488 967 1425 c
+1049 1362 1090 1275 1090 1163 c
+1090 1073 1062 998 1005 937 c
+948 877 868 836 764 815 c
+887 798 982 753 1049 681 c
+1117 610 1151 517 1151 403 c
+1151 264 1104 157 1010 82 c
+917 8 782 -29 606 -29 c
+528 -29 452 -21 377 -4 c
+303 13 229 38 156 72 c
+156 362 l
+268 362 l
+275 266 307 193 365 144 c
+423 95 505 70 610 70 c
+712 70 792 99 851 158 c
+910 217 940 298 940 401 c
+940 518 910 607 849 667 c
+788 728 699 758 582 758 c
+487 758 l
+487 860 l
+537 860 l
+654 860 741 884 799 932 c
+858 981 887 1054 887 1151 c
+887 1238 863 1305 815 1351 c
+767 1398 698 1421 608 1421 c
+518 1421 448 1400 398 1357 c
+349 1314 320 1251 311 1167 c
+199 1167 l
+199 1430 l
+
+ce} _d
+/four{1303 0 63 0 1200 1520 sc
+715 506 m
+715 1300 l
+205 506 l
+715 506 l
+
+1155 0 m
+475 0 l
+475 106 l
+715 106 l
+715 399 l
+63 399 l
+63 508 l
+717 1520 l
+915 1520 l
+915 506 l
+1200 506 l
+1200 399 l
+915 399 l
+915 106 l
+1155 106 l
+1155 0 l
+
+ce} _d
+/five{1303 0 174 -29 1145 1493 sc
+1030 1493 m
+1030 1329 l
+346 1329 l
+346 901 l
+381 925 421 943 467 955 c
+514 967 566 973 623 973 c
+784 973 912 928 1005 839 c
+1098 750 1145 628 1145 473 c
+1145 315 1098 192 1003 103 c
+909 15 777 -29 606 -29 c
+537 -29 467 -21 395 -4 c
+323 13 249 38 174 72 c
+174 362 l
+287 362 l
+293 267 323 195 377 145 c
+432 95 508 70 606 70 c
+711 70 792 105 849 174 c
+906 243 934 343 934 473 c
+934 602 906 701 849 770 c
+793 839 712 874 606 874 c
+546 874 493 863 447 842 c
+402 821 361 788 326 743 c
+240 743 l
+240 1493 l
+1030 1493 l
+
+ce} _d
+/six{1303 0 137 -29 1174 1520 sc
+670 70 m
+764 70 836 104 887 173 c
+938 242 963 342 963 471 c
+963 600 938 699 887 768 c
+836 837 764 872 670 872 c
+575 872 502 839 452 772 c
+402 705 377 609 377 483 c
+377 350 402 248 453 177 c
+504 106 576 70 670 70 c
+
+344 822 m
+389 872 441 909 498 934 c
+555 959 620 971 692 971 c
+841 971 958 926 1044 837 c
+1131 748 1174 626 1174 471 c
+1174 320 1127 198 1034 107 c
+941 16 817 -29 662 -29 c
+493 -29 364 34 273 159 c
+182 285 137 465 137 698 c
+137 959 191 1162 298 1305 c
+405 1448 557 1520 752 1520 c
+805 1520 860 1515 918 1505 c
+976 1495 1035 1480 1096 1460 c
+1096 1214 l
+983 1214 l
+975 1281 949 1333 906 1368 c
+863 1403 804 1421 731 1421 c
+602 1421 505 1372 442 1274 c
+379 1176 346 1025 344 822 c
+
+ce} _d
+/eight{1303 0 137 -29 1165 1520 sc
+954 408 m
+954 515 927 597 874 656 c
+821 715 747 745 651 745 c
+555 745 480 715 427 656 c
+374 597 348 515 348 408 c
+348 301 374 217 427 158 c
+480 99 555 70 651 70 c
+747 70 821 99 874 158 c
+927 217 954 301 954 408 c
+
+913 1133 m
+913 1224 890 1294 844 1345 c
+798 1396 734 1421 651 1421 c
+569 1421 505 1396 458 1345 c
+412 1294 389 1224 389 1133 c
+389 1042 412 971 458 920 c
+505 869 569 844 651 844 c
+734 844 798 869 844 920 c
+890 971 913 1042 913 1133 c
+
+805 795 m
+918 780 1007 738 1070 669 c
+1133 601 1165 514 1165 408 c
+1165 268 1121 160 1032 84 c
+943 9 816 -29 651 -29 c
+486 -29 359 9 270 84 c
+181 160 137 268 137 408 c
+137 514 169 601 232 669 c
+295 738 384 780 498 795 c
+397 813 320 851 266 909 c
+213 968 186 1042 186 1133 c
+186 1253 227 1347 310 1416 c
+393 1485 506 1520 651 1520 c
+796 1520 909 1485 992 1416 c
+1075 1347 1116 1253 1116 1133 c
+1116 1042 1089 968 1035 909 c
+982 851 905 813 805 795 c
+
+ce} _d
+/K{1530 0 113 0 1561 1493 sc
+113 0 m
+113 106 l
+303 106 l
+303 1386 l
+113 1386 l
+113 1493 l
+696 1493 l
+696 1386 l
+506 1386 l
+506 821 l
+1149 1386 l
+987 1386 l
+987 1493 l
+1483 1493 l
+1483 1386 l
+1315 1386 l
+674 823 l
+1391 106 l
+1561 106 l
+1561 0 l
+1214 0 l
+506 709 l
+506 106 l
+696 106 l
+696 0 l
+113 0 l
+
+ce} _d
+/N{1792 0 100 -29 1702 1493 sc
+100 0 m
+100 106 l
+301 106 l
+301 1386 l
+100 1386 l
+100 1493 l
+483 1493 l
+1378 315 l
+1378 1386 l
+1178 1386 l
+1178 1493 l
+1702 1493 l
+1702 1386 l
+1501 1386 l
+1501 -29 l
+1380 -29 l
+424 1229 l
+424 106 l
+625 106 l
+625 0 l
+100 0 l
+
+ce} _d
+/S{1403 0 172 -29 1253 1520 sc
+190 72 m
+190 412 l
+305 411 l
+308 298 341 214 403 159 c
+466 105 561 78 688 78 c
+807 78 897 101 959 148 c
+1022 195 1053 264 1053 354 c
+1053 426 1034 481 996 520 c
+959 559 879 596 758 633 c
+561 692 l
+418 735 318 789 259 854 c
+201 919 172 1007 172 1120 c
+172 1247 217 1345 307 1415 c
+397 1485 523 1520 686 1520 c
+755 1520 831 1512 914 1497 c
+997 1482 1085 1461 1178 1432 c
+1178 1114 l
+1065 1114 l
+1054 1219 1018 1295 959 1342 c
+900 1389 811 1413 690 1413 c
+585 1413 504 1391 449 1348 c
+394 1305 367 1243 367 1161 c
+367 1090 388 1034 429 993 c
+470 952 558 912 692 872 c
+877 817 l
+1012 776 1109 724 1166 661 c
+1224 598 1253 514 1253 408 c
+1253 263 1207 154 1114 81 c
+1021 8 883 -29 700 -29 c
+618 -29 534 -21 449 -4 c
+364 13 278 38 190 72 c
+
+ce} _d
+/T{1366 0 20 0 1346 1493 sc
+391 0 m
+391 106 l
+582 106 l
+582 1374 l
+143 1374 l
+143 1141 l
+20 1141 l
+20 1493 l
+1346 1493 l
+1346 1141 l
+1223 1141 l
+1223 1374 l
+784 1374 l
+784 106 l
+975 106 l
+975 0 l
+391 0 l
+
+ce} _d
+/multiply{1716 0 283 68 1434 1217 sc
+1434 1104 m
+971 641 l
+1434 180 l
+1319 68 l
+858 528 l
+397 68 l
+283 180 l
+743 641 l
+283 1104 l
+397 1217 l
+858 756 l
+1319 1217 l
+1434 1104 l
+
+ce} _d
+/a{1221 0 102 -29 1163 1092 sc
+815 334 m
+815 559 l
+578 559 l
+487 559 419 539 374 500 c
+329 461 307 400 307 319 c
+307 245 330 186 375 143 c
+420 100 482 78 559 78 c
+636 78 697 102 744 149 c
+791 196 815 258 815 334 c
+
+999 664 m
+999 106 l
+1163 106 l
+1163 0 l
+815 0 l
+815 115 l
+774 66 727 29 674 6 c
+621 -17 558 -29 487 -29 c
+369 -29 275 2 206 65 c
+137 128 102 212 102 319 c
+102 429 142 514 221 575 c
+300 636 412 666 557 666 c
+815 666 l
+815 739 l
+815 820 790 882 741 926 c
+692 971 624 993 535 993 c
+462 993 403 976 360 943 c
+317 910 290 860 279 795 c
+184 795 l
+184 1010 l
+248 1037 310 1058 370 1071 c
+431 1085 490 1092 547 1092 c
+694 1092 806 1055 883 982 c
+960 909 999 803 999 664 c
+
+ce} _d
+/b{1311 0 59 -29 1208 1556 sc
+236 106 m
+236 1450 l
+59 1450 l
+59 1556 l
+420 1556 l
+420 897 l
+456 964 502 1013 557 1044 c
+613 1076 682 1092 764 1092 c
+895 1092 1001 1040 1084 937 c
+1167 834 1208 699 1208 532 c
+1208 365 1167 230 1084 126 c
+1001 23 895 -29 764 -29 c
+682 -29 613 -13 557 18 c
+502 50 456 99 420 166 c
+420 0 l
+59 0 l
+59 106 l
+236 106 l
+
+420 479 m
+420 351 444 253 493 186 c
+542 119 614 86 707 86 c
+801 86 872 124 920 199 c
+969 274 993 385 993 532 c
+993 679 969 790 920 865 c
+872 940 801 977 707 977 c
+614 977 542 943 493 876 c
+444 809 420 711 420 584 c
+420 479 l
+
+ce} _d
+/e{1212 0 102 -29 1110 1092 sc
+1110 512 m
+317 512 l
+317 504 l
+317 361 344 252 398 179 c
+452 106 532 70 637 70 c
+718 70 784 91 835 133 c
+887 176 923 239 944 322 c
+1092 322 l
+1063 205 1008 118 929 59 c
+850 0 747 -29 618 -29 c
+463 -29 338 22 243 124 c
+149 227 102 363 102 532 c
+102 700 148 835 241 938 c
+334 1041 455 1092 606 1092 c
+767 1092 890 1042 976 943 c
+1062 844 1107 701 1110 512 c
+
+893 618 m
+889 742 863 835 814 898 c
+766 961 697 993 606 993 c
+521 993 455 961 406 898 c
+357 835 328 741 317 618 c
+893 618 l
+
+ce} _d
+/f{758 0 74 0 881 1556 sc
+881 1305 m
+784 1305 l
+783 1355 769 1393 741 1419 c
+714 1445 674 1458 621 1458 c
+552 1458 504 1439 476 1401 c
+448 1364 434 1297 434 1200 c
+434 1063 l
+731 1063 l
+731 956 l
+434 956 l
+434 106 l
+670 106 l
+670 0 l
+74 0 l
+74 106 l
+250 106 l
+250 956 l
+74 956 l
+74 1063 l
+250 1063 l
+250 1196 l
+250 1315 281 1404 342 1465 c
+404 1526 495 1556 614 1556 c
+659 1556 703 1552 748 1544 c
+793 1536 837 1524 881 1507 c
+881 1305 l
+
+ce} _d
+/g{1311 0 102 -455 1251 1092 sc
+1075 956 m
+1075 23 l
+1075 -130 1033 -247 949 -330 c
+865 -413 745 -455 590 -455 c
+520 -455 453 -449 389 -436 c
+325 -423 264 -404 205 -379 c
+205 -156 l
+301 -156 l
+313 -225 341 -276 386 -308 c
+431 -340 495 -356 578 -356 c
+686 -356 765 -325 815 -264 c
+866 -203 891 -108 891 23 c
+891 166 l
+855 99 809 50 753 18 c
+698 -13 629 -29 547 -29 c
+416 -29 309 23 226 126 c
+143 230 102 365 102 532 c
+102 699 143 834 226 937 c
+309 1040 416 1092 547 1092 c
+629 1092 698 1076 753 1044 c
+809 1013 855 964 891 897 c
+891 1063 l
+1251 1063 l
+1251 956 l
+1075 956 l
+
+891 584 m
+891 711 866 809 817 876 c
+768 943 697 977 604 977 c
+509 977 438 940 389 865 c
+341 790 317 679 317 532 c
+317 385 341 274 389 199 c
+438 124 509 86 604 86 c
+697 86 768 119 817 186 c
+866 253 891 351 891 479 c
+891 584 l
+
+ce} _d
+/i{655 0 74 0 608 1507 sc
+199 1393 m
+199 1424 210 1450 232 1473 c
+255 1496 282 1507 313 1507 c
+344 1507 370 1496 392 1473 c
+415 1450 426 1424 426 1393 c
+426 1362 415 1335 393 1313 c
+371 1291 344 1280 313 1280 c
+282 1280 255 1291 232 1313 c
+210 1335 199 1362 199 1393 c
+
+434 106 m
+608 106 l
+608 0 l
+74 0 l
+74 106 l
+250 106 l
+250 956 l
+74 956 l
+74 1063 l
+434 1063 l
+434 106 l
+
+ce} _d
+/l{655 0 59 0 594 1556 sc
+420 106 m
+594 106 l
+594 0 l
+59 0 l
+59 106 l
+236 106 l
+236 1450 l
+59 1450 l
+59 1556 l
+420 1556 l
+420 106 l
+
+ce} _d
+/m{1942 0 74 0 1886 1092 sc
+1061 856 m
+1096 934 1141 993 1196 1032 c
+1251 1072 1316 1092 1389 1092 c
+1500 1092 1583 1057 1638 988 c
+1693 919 1720 815 1720 676 c
+1720 106 l
+1886 106 l
+1886 0 l
+1376 0 l
+1376 106 l
+1536 106 l
+1536 655 l
+1536 764 1520 841 1488 887 c
+1456 933 1403 956 1329 956 c
+1247 956 1184 925 1141 863 c
+1098 801 1077 711 1077 592 c
+1077 106 l
+1237 106 l
+1237 0 l
+733 0 l
+733 106 l
+893 106 l
+893 662 l
+893 768 877 843 845 888 c
+813 933 760 956 686 956 c
+604 956 541 925 498 863 c
+455 801 434 711 434 592 c
+434 106 l
+594 106 l
+594 0 l
+84 0 l
+84 106 l
+250 106 l
+250 958 l
+74 958 l
+74 1063 l
+434 1063 l
+434 874 l
+468 945 511 999 564 1036 c
+617 1073 676 1092 743 1092 c
+826 1092 895 1071 950 1030 c
+1005 989 1042 931 1061 856 c
+
+ce} _d
+/n{1319 0 74 0 1262 1092 sc
+84 0 m
+84 106 l
+250 106 l
+250 956 l
+74 956 l
+74 1063 l
+434 1063 l
+434 874 l
+468 946 512 1000 566 1037 c
+621 1074 684 1092 756 1092 c
+873 1092 960 1058 1015 991 c
+1070 924 1098 819 1098 676 c
+1098 106 l
+1262 106 l
+1262 0 l
+754 0 l
+754 106 l
+913 106 l
+913 618 l
+913 748 897 837 865 885 c
+833 934 777 958 696 958 c
+611 958 546 927 501 864 c
+456 802 434 711 434 592 c
+434 106 l
+594 106 l
+594 0 l
+84 0 l
+
+ce} _d
+/o{1233 0 102 -29 1130 1092 sc
+616 70 m
+715 70 789 109 839 187 c
+890 265 915 380 915 532 c
+915 684 890 799 839 876 c
+789 954 715 993 616 993 c
+517 993 443 954 392 876 c
+342 799 317 684 317 532 c
+317 380 342 265 393 187 c
+444 109 518 70 616 70 c
+
+616 -29 m
+461 -29 337 22 243 124 c
+149 227 102 363 102 532 c
+102 701 149 837 242 939 c
+336 1041 461 1092 616 1092 c
+771 1092 896 1041 989 939 c
+1083 837 1130 701 1130 532 c
+1130 363 1083 227 989 124 c
+896 22 771 -29 616 -29 c
+
+ce} _d
+/p{1311 0 59 -426 1208 1092 sc
+420 584 m
+420 479 l
+420 351 444 253 493 186 c
+542 119 614 86 707 86 c
+801 86 872 124 920 199 c
+969 274 993 385 993 532 c
+993 679 969 790 920 865 c
+872 940 801 977 707 977 c
+614 977 542 943 493 876 c
+444 809 420 711 420 584 c
+
+236 956 m
+59 956 l
+59 1063 l
+420 1063 l
+420 897 l
+456 964 502 1013 557 1044 c
+613 1076 682 1092 764 1092 c
+895 1092 1001 1040 1084 937 c
+1167 834 1208 699 1208 532 c
+1208 365 1167 230 1084 126 c
+1001 23 895 -29 764 -29 c
+682 -29 613 -13 557 18 c
+502 50 456 99 420 166 c
+420 -319 l
+594 -319 l
+594 -426 l
+59 -426 l
+59 -319 l
+236 -319 l
+236 956 l
+
+ce} _d
+/r{979 0 74 0 979 1092 sc
+979 1065 m
+979 799 l
+873 799 l
+870 852 855 891 829 917 c
+803 943 765 956 715 956 c
+624 956 555 925 506 862 c
+458 799 434 709 434 592 c
+434 106 l
+647 106 l
+647 0 l
+84 0 l
+84 106 l
+250 106 l
+250 958 l
+74 958 l
+74 1063 l
+434 1063 l
+434 874 l
+470 948 516 1003 573 1038 c
+630 1074 699 1092 780 1092 c
+810 1092 841 1090 874 1085 c
+907 1080 942 1074 979 1065 c
+
+ce} _d
+/s{1051 0 115 -29 946 1092 sc
+115 59 m
+115 307 l
+221 307 l
+224 228 248 168 295 129 c
+342 90 412 70 504 70 c
+587 70 650 85 693 116 c
+736 147 758 193 758 252 c
+758 299 742 336 710 365 c
+679 394 612 424 510 457 c
+377 502 l
+286 531 219 568 178 612 c
+137 656 117 712 117 780 c
+117 877 153 954 224 1009 c
+295 1064 394 1092 520 1092 c
+576 1092 635 1085 697 1070 c
+759 1055 823 1034 889 1006 c
+889 774 l
+783 774 l
+780 843 756 896 711 935 c
+666 974 604 993 526 993 c
+449 993 390 979 350 952 c
+311 925 291 884 291 829 c
+291 784 306 748 336 721 c
+366 694 426 667 516 639 c
+662 594 l
+763 563 835 523 879 476 c
+924 429 946 369 946 295 c
+946 194 907 115 830 57 c
+753 -0 647 -29 512 -29 c
+443 -29 376 -22 311 -7 c
+246 8 180 30 115 59 c
+
+ce} _d
+/u{1319 0 55 -29 1243 1063 sc
+725 1063 m
+1069 1063 l
+1069 106 l
+1243 106 l
+1243 0 l
+885 0 l
+885 188 l
+851 117 807 63 753 26 c
+699 -11 636 -29 565 -29 c
+447 -29 360 4 304 71 c
+249 138 221 244 221 387 c
+221 956 l
+55 956 l
+55 1063 l
+406 1063 l
+406 444 l
+406 315 422 226 453 178 c
+485 130 542 106 623 106 c
+708 106 773 137 818 200 c
+863 263 885 354 885 473 c
+885 956 l
+725 956 l
+725 1063 l
+
+ce} _d
+/y{1157 0 -6 -455 1151 1063 sc
+442 -195 m
+512 -18 l
+115 956 l
+-6 956 l
+-6 1063 l
+483 1063 l
+483 956 l
+313 956 l
+612 225 l
+911 956 l
+752 956 l
+752 1063 l
+1151 1063 l
+1151 956 l
+1032 956 l
+545 -240 l
+512 -323 475 -379 434 -409 c
+393 -440 336 -455 262 -455 c
+231 -455 198 -452 165 -447 c
+132 -442 99 -434 66 -424 c
+66 -221 l
+160 -221 l
+164 -266 175 -299 194 -318 c
+213 -338 243 -348 283 -348 c
+320 -348 349 -338 371 -317 c
+394 -297 417 -256 442 -195 c
+
+ce} _d
+end readonly def
+
+/BuildGlyph {
+ exch begin
+ CharStrings exch
+ 2 copy known not {pop /.notdef} if
+ true 3 1 roll get exec
+ end
+} _d
+
+/BuildChar {
+ 1 index /Encoding get exch get
+ 1 index /BuildGlyph get exec
+} _d
+
+FontName currentdict end definefont pop
+end
+%%EndProlog
+mpldict begin
+75.6 223.2 translate
+460.8 345.6 0 0 clipbox
+gsave
+0 0 m
+460.8 0 l
+460.8 345.6 l
+0 345.6 l
+cl
+1.000 setgray
+fill
+grestore
+gsave
+108.312798 56.796 m
+433.392671 56.796 l
+433.392671 330.048 l
+108.312798 330.048 l
+cl
+1.000 setgray
+fill
+grestore
+0.800 setlinewidth
+1 setlinejoin
+1 setlinecap
+[] 0 setdash
+0.800 setgray
+gsave
+325.08 273.252 108.313 56.796 clipbox
+108.312798 56.796 m
+108.312798 330.048 l
+stroke
+grestore
+0.150 setgray
+gsave
+96.3128 38.1991 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/one glyphshow
+8.43005 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+16.9782 5.24062 moveto
+/one glyphshow
+grestore
+0.800 setgray
+gsave
+325.08 273.252 108.313 56.796 clipbox
+173.328772 56.796 m
+173.328772 330.048 l
+stroke
+grestore
+0.150 setgray
+gsave
+161.329 38.1991 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/one glyphshow
+8.43005 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+16.9782 5.24062 moveto
+/two glyphshow
+grestore
+0.800 setgray
+gsave
+325.08 273.252 108.313 56.796 clipbox
+238.344747 56.796 m
+238.344747 330.048 l
+stroke
+grestore
+0.150 setgray
+gsave
+226.345 38.1991 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/one glyphshow
+8.43005 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+16.9782 5.24062 moveto
+/three glyphshow
+grestore
+0.800 setgray
+gsave
+325.08 273.252 108.313 56.796 clipbox
+303.360722 56.796 m
+303.360722 330.048 l
+stroke
+grestore
+0.150 setgray
+gsave
+291.361 38.1991 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/one glyphshow
+8.43005 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+16.9782 5.24062 moveto
+/four glyphshow
+grestore
+0.800 setgray
+gsave
+325.08 273.252 108.313 56.796 clipbox
+368.376696 56.796 m
+368.376696 330.048 l
+stroke
+grestore
+0.150 setgray
+gsave
+356.377 38.1991 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.635938 moveto
+/one glyphshow
+8.43005 0.635938 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+16.9782 5.36094 moveto
+/five glyphshow
+grestore
+0.800 setgray
+gsave
+325.08 273.252 108.313 56.796 clipbox
+433.392671 56.796 m
+433.392671 330.048 l
+stroke
+grestore
+0.150 setgray
+gsave
+421.393 38.1991 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/one glyphshow
+8.43005 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+16.9782 5.24062 moveto
+/six glyphshow
+grestore
+/DejaVuSerif 14.400 selectfont
+gsave
+
+198.384 20.8554 translate
+0 rotate
+0 0 m /N glyphshow
+12.5781 0 m /u glyphshow
+21.8362 0 m /m glyphshow
+35.4672 0 m /b glyphshow
+44.6692 0 m /e glyphshow
+53.1763 0 m /r glyphshow
+60.0479 0 m /space glyphshow
+64.6173 0 m /o glyphshow
+73.2718 0 m /f glyphshow
+78.5922 0 m /space glyphshow
+83.1616 0 m /S glyphshow
+93.0093 0 m /a glyphshow
+101.58 0 m /m glyphshow
+115.211 0 m /p glyphshow
+124.413 0 m /l glyphshow
+129.01 0 m /e glyphshow
+137.517 0 m /s glyphshow
+grestore
+gsave
+33.7128 66.0988 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/two glyphshow
+8.43005 0.515625 moveto
+/period glyphshow
+12.2668 0.515625 moveto
+/two glyphshow
+23.2783 0.515625 moveto
+/multiply glyphshow
+36.9618 0.515625 moveto
+/one glyphshow
+45.3918 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+53.94 5.24062 moveto
+/minus glyphshow
+61.7115 5.24062 moveto
+/two glyphshow
+grestore
+gsave
+32.7128 113.977 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/two glyphshow
+8.43005 0.515625 moveto
+/period glyphshow
+12.6418 0.515625 moveto
+/four glyphshow
+23.6533 0.515625 moveto
+/multiply glyphshow
+37.3368 0.515625 moveto
+/one glyphshow
+45.7668 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+54.315 5.24062 moveto
+/minus glyphshow
+62.0865 5.24062 moveto
+/two glyphshow
+grestore
+gsave
+32.7128 158.021 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/two glyphshow
+8.43005 0.515625 moveto
+/period glyphshow
+12.6418 0.515625 moveto
+/six glyphshow
+23.6533 0.515625 moveto
+/multiply glyphshow
+37.3368 0.515625 moveto
+/one glyphshow
+45.7668 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+54.315 5.24062 moveto
+/minus glyphshow
+62.0865 5.24062 moveto
+/two glyphshow
+grestore
+gsave
+33.7128 198.799 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/two glyphshow
+8.43005 0.515625 moveto
+/period glyphshow
+12.1418 0.515625 moveto
+/eight glyphshow
+23.1533 0.515625 moveto
+/multiply glyphshow
+36.8368 0.515625 moveto
+/one glyphshow
+45.2668 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+53.815 5.24062 moveto
+/minus glyphshow
+61.5865 5.24062 moveto
+/two glyphshow
+grestore
+gsave
+45.7128 236.763 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/three glyphshow
+11.0115 0.515625 moveto
+/multiply glyphshow
+24.6949 0.515625 moveto
+/one glyphshow
+33.125 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+41.6732 5.24062 moveto
+/minus glyphshow
+49.4446 5.24062 moveto
+/two glyphshow
+grestore
+gsave
+33.7128 272.275 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/three glyphshow
+8.43005 0.515625 moveto
+/period glyphshow
+12.2668 0.515625 moveto
+/two glyphshow
+23.2783 0.515625 moveto
+/multiply glyphshow
+36.9618 0.515625 moveto
+/one glyphshow
+45.3918 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+53.94 5.24062 moveto
+/minus glyphshow
+61.7115 5.24062 moveto
+/two glyphshow
+grestore
+gsave
+32.7128 305.634 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/three glyphshow
+8.43005 0.515625 moveto
+/period glyphshow
+12.6418 0.515625 moveto
+/four glyphshow
+23.6533 0.515625 moveto
+/multiply glyphshow
+37.3368 0.515625 moveto
+/one glyphshow
+45.7668 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+54.315 5.24062 moveto
+/minus glyphshow
+62.0865 5.24062 moveto
+/two glyphshow
+grestore
+/DejaVuSerif 14.400 selectfont
+gsave
+
+25.6034 141.984 translate
+90 rotate
+0 0 m /T glyphshow
+9.58801 0 m /r glyphshow
+16.4597 0 m /a glyphshow
+25.0299 0 m /i glyphshow
+29.6274 0 m /n glyphshow
+38.8855 0 m /i glyphshow
+43.483 0 m /n glyphshow
+52.7411 0 m /g glyphshow
+61.9431 0 m /space glyphshow
+66.5125 0 m /T glyphshow
+76.1005 0 m /i glyphshow
+80.6979 0 m /m glyphshow
+94.3289 0 m /e glyphshow
+grestore
+1.200 setlinewidth
+0.122 0.467 0.706 setrgbcolor
+gsave
+325.08 273.252 108.313 56.796 clipbox
+283.788963 310.291969 m
+stroke
+grestore
+gsave
+325.08 273.252 108.313 56.796 clipbox
+283.788963 302.547354 m
+283.788963 317.627455 l
+stroke
+grestore
+0 setlinecap
+[4.8 1.8] 0 setdash
+gsave
+325.08 273.252 108.313 56.796 clipbox
+283.788963 142.738982 m
+stroke
+grestore
+1 setlinecap
+[] 0 setdash
+gsave
+325.08 273.252 108.313 56.796 clipbox
+283.788963 134.430515 m
+283.788963 151.743921 l
+stroke
+grestore
+0 setlinecap
+[1.2 1.2] 0 setdash
+gsave
+325.08 273.252 108.313 56.796 clipbox
+283.788963 87.537625 m
+stroke
+grestore
+1 setlinecap
+[] 0 setdash
+gsave
+325.08 273.252 108.313 56.796 clipbox
+283.788963 69.216545 m
+283.788963 114.754776 l
+stroke
+grestore
+1.000 setlinewidth
+0 setlinejoin
+2 setlinecap
+0.800 setgray
+gsave
+108.312798 56.796 m
+108.312798 330.048 l
+stroke
+grestore
+gsave
+433.392671 56.796 m
+433.392671 330.048 l
+stroke
+grestore
+gsave
+108.312798 56.796 m
+433.392671 56.796 l
+stroke
+grestore
+gsave
+108.312798 330.048 m
+433.392671 330.048 l
+stroke
+grestore
+0.800 setlinewidth
+0 setlinecap
+gsave
+342.834546 241.61925 m
+424.152671 241.61925 l
+425.912671 241.61925 426.792671 242.49925 426.792671 244.25925 c
+426.792671 320.808 l
+426.792671 322.568 425.912671 323.448 424.152671 323.448 c
+342.834546 323.448 l
+341.074546 323.448 340.194546 322.568 340.194546 320.808 c
+340.194546 244.25925 l
+340.194546 242.49925 341.074546 241.61925 342.834546 241.61925 c
+cl
+gsave
+1.000 setgray
+fill
+grestore
+stroke
+grestore
+0.150 setgray
+/DejaVuSerif 14.400 selectfont
+gsave
+
+359.431 307.527 translate
+0 rotate
+0 0 m /K glyphshow
+10.3641 0 m /e glyphshow
+18.8712 0 m /r glyphshow
+25.7429 0 m /n glyphshow
+35.001 0 m /e glyphshow
+43.5081 0 m /l glyphshow
+grestore
+1.200 setlinewidth
+1 setlinejoin
+1 setlinecap
+0.122 0.467 0.706 setrgbcolor
+gsave
+345.474546 292.766125 m
+358.674546 292.766125 l
+371.874546 292.766125 l
+stroke
+grestore
+0.150 setgray
+/DejaVuSerif 13.200 selectfont
+gsave
+
+382.435 288.146 translate
+0 rotate
+0 0 m /r glyphshow
+6.33386 0 m /b glyphshow
+14.8157 0 m /f glyphshow
+grestore
+0 setlinecap
+[4.8 1.8] 0 setdash
+0.122 0.467 0.706 setrgbcolor
+gsave
+345.474546 273.588 m
+358.674546 273.588 l
+371.874546 273.588 l
+stroke
+grestore
+0.150 setgray
+/DejaVuSerif 13.200 selectfont
+gsave
+
+382.435 268.968 translate
+0 rotate
+0 0 m /p glyphshow
+8.48181 0 m /o glyphshow
+16.459 0 m /l glyphshow
+20.6967 0 m /y glyphshow
+grestore
+[1.2 1.2] 0 setdash
+0.122 0.467 0.706 setrgbcolor
+gsave
+345.474546 254.222375 m
+358.674546 254.222375 l
+371.874546 254.222375 l
+stroke
+grestore
+0.150 setgray
+/DejaVuSerif 13.200 selectfont
+gsave
+
+382.435 249.602 translate
+0 rotate
+0 0 m /l glyphshow
+4.23767 0 m /i glyphshow
+8.47534 0 m /n glyphshow
+17.0089 0 m /e glyphshow
+24.8502 0 m /a glyphshow
+32.7498 0 m /r glyphshow
+grestore
+
+end
+showpage
diff --git a/examples/security/kdd-nsl/retrain.py b/examples/security/kdd-nsl/retrain.py
index a7dbac4f..83b398aa 100644
--- a/examples/security/kdd-nsl/retrain.py
+++ b/examples/security/kdd-nsl/retrain.py
@@ -237,7 +237,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list:
results = pd.read_csv("output/train.csv")
# Some convenient variable names
# input_size = results["data.generate.kwargs.n_samples"] * results["data.generate.kwargs.n_features"]
-results["Kernel"] = results["model.init.kwargs.kernel"].copy()
+results["Kernel"] = results["model.init.kernel"].copy()
# results["Features"] = results["data.generate.kwargs.n_features"].copy()
# results["Samples"] = results["data.sample.train_size"].copy()
# results["input_size"] = input_size
@@ -310,8 +310,11 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list:
"r",
) as f:
probs = json.load(f)
- probs = np.array(probs)
- false_confidence = y_test[: len(probs)] - probs[:, 1]
+ probs = np.squeeze(np.array(probs))
+ # take only the second column
+ if len(probs.shape) > 1:
+ probs = probs[:, 1]
+ false_confidence = y_test[: len(probs)] - probs[:]
avg_prob = np.mean(false_confidence)
with open(
Path("output/reports/attack", folder, "score_dict.json"),
@@ -341,7 +344,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list:
params = json.load(f)
else:
raise ValueError(f"No params file found for {folder}")
- attack_params = params["attack"]["init"]["kwargs"]
+ attack_params = params["attack"]["init"]
attack_params.update({"name": params["attack"]["init"]["name"]})
confidence_ser["Kernel"] = name
confidence_ser["Average False Confidence"] = avg_prob
@@ -392,7 +395,12 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list:
) as f:
probs = json.load(f)
probs = np.array(probs)
- false_confidence = y_test[: len(probs)] - probs[:, 1]
+ if len(probs.shape) > 1:
+ probs = np.squeeze(probs)
+ probs = probs[:, 1]
+ else:
+ probs = np.squeeze(probs)
+ false_confidence = y_test[: len(probs)] - probs
avg_prob = np.mean(false_confidence)
pd.DataFrame(probs).to_csv(
Path(
@@ -429,7 +437,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list:
else:
logger.warning(f"No params file found for {folder}")
continue
- attack_params = params["attack"]["init"]["kwargs"]
+ attack_params = params["attack"]["init"]
attack_params.update({"name": params["attack"]["init"]["name"]})
confidence_ser["Kernel"] = name
confidence_ser["Average False Confidence After Retraining"] = avg_prob
diff --git a/examples/security/truthseeker/.gitignore b/examples/security/truthseeker/.gitignore
index b12c2563..ff637185 100644
--- a/examples/security/truthseeker/.gitignore
+++ b/examples/security/truthseeker/.gitignore
@@ -2,3 +2,4 @@ logs/
multirun/
output/
models/
+/retrain
diff --git a/examples/security/truthseeker/attacks.sh b/examples/security/truthseeker/attacks.sh
index 76ed02bc..ccbb0574 100644
--- a/examples/security/truthseeker/attacks.sh
+++ b/examples/security/truthseeker/attacks.sh
@@ -11,7 +11,7 @@ for model_config in $CONFIG_NAMES; do
continue
fi
HYDRA_FULL_ERROR=1 python -m deckard.layers.optimise \
- ++model.init.kernel=kernel_name \
+ ++model.init.kernel=${kernel_name} \
++stage=attack \
++attack.init.name=art.attacks.evasion.ProjectedGradientDescent \
++attack.init.norm=1,2,inf \
diff --git a/examples/security/truthseeker/dvc.lock b/examples/security/truthseeker/dvc.lock
index f3ba1d0a..0945b506 100644
--- a/examples/security/truthseeker/dvc.lock
+++ b/examples/security/truthseeker/dvc.lock
@@ -94,39 +94,39 @@ stages:
outs:
- path: output/reports/train/default/params.yaml
hash: md5
- md5: 7234aab7d5edae504afa2090d96e4c3f
- size: 2434
+ md5: 6225c0aefe4059bfae7f5b0e04ae549a
+ size: 2189
- path: output/reports/train/default/predictions.json
hash: md5
- md5: 7e3dec7b2d06af151bf81addc33fba5a
- size: 44061
+ md5: 3c5089245ae71f1b860304a02a224078
+ size: 70072
- path: output/reports/train/default/probabilities.json
hash: md5
- md5: 7e3dec7b2d06af151bf81addc33fba5a
- size: 44061
+ md5: 3c5089245ae71f1b860304a02a224078
+ size: 70072
- path: output/reports/train/default/score_dict.json
hash: md5
- md5: 1b659aed969c2f3dbd29681d381ce1d0
- size: 360
+ md5: 82b8ad9524a1b60f5cbdf4937870888b
+ size: 717
attack:
cmd: python -m deckard.layers.experiment attack
deps:
- path: output/reports/train/default/params.yaml
hash: md5
- md5: 7234aab7d5edae504afa2090d96e4c3f
- size: 2434
+ md5: 6225c0aefe4059bfae7f5b0e04ae549a
+ size: 2189
- path: output/reports/train/default/predictions.json
hash: md5
- md5: 7e3dec7b2d06af151bf81addc33fba5a
- size: 44061
+ md5: 3c5089245ae71f1b860304a02a224078
+ size: 70072
- path: output/reports/train/default/probabilities.json
hash: md5
- md5: 7e3dec7b2d06af151bf81addc33fba5a
- size: 44061
+ md5: 3c5089245ae71f1b860304a02a224078
+ size: 70072
- path: output/reports/train/default/score_dict.json
hash: md5
- md5: 1b659aed969c2f3dbd29681d381ce1d0
- size: 360
+ md5: 82b8ad9524a1b60f5cbdf4937870888b
+ size: 717
params:
params.yaml:
attack:
@@ -315,32 +315,32 @@ stages:
outs:
- path: output/attacks/attack.pkl
hash: md5
- md5: 2b7587aefdfa486e84fb3c4ccb5f640c
+ md5: 444495650bb1e76bae90cbb99153f824
size: 1832
- path: output/reports/attack/default/adv_predictions.json
hash: md5
- md5: 18482a5b7773de281dc9e127a6febf98
- size: 438
+ md5: 9878cc54791c7354cb668af97e66079a
+ size: 700
- path: output/reports/attack/default/adv_probabilities.json
hash: md5
- md5: 18482a5b7773de281dc9e127a6febf98
- size: 438
+ md5: 9878cc54791c7354cb668af97e66079a
+ size: 700
- path: output/reports/attack/default/params.yaml
hash: md5
- md5: b300c684dc58fc23684ccefbb9f83265
- size: 5832
+ md5: 3aa13a2e1e66b911f66d9bd8a8823369
+ size: 5310
- path: output/reports/attack/default/predictions.json
hash: md5
- md5: 7e3dec7b2d06af151bf81addc33fba5a
- size: 44061
+ md5: 3c5089245ae71f1b860304a02a224078
+ size: 70072
- path: output/reports/attack/default/probabilities.json
hash: md5
- md5: 7e3dec7b2d06af151bf81addc33fba5a
- size: 44061
+ md5: 3c5089245ae71f1b860304a02a224078
+ size: 70072
- path: output/reports/attack/default/score_dict.json
hash: md5
- md5: fe6164548c98534ee88f439f91a5151a
- size: 585
+ md5: 04f78e33b2894f630875ad3c6412a5ff
+ size: 1238
models:
cmd: bash other_data.sh +stage=train --config-name=model.yaml
deps:
@@ -448,53 +448,54 @@ stages:
outs:
- path: logs/models/
hash: md5
- md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir
- size: 357091
+ md5: 8e67f43a680648ecc549525d90f55662.dir
+ size: 202043
nfiles: 3
- path: model.db
hash: md5
- md5: 0b595e029e8e9d6e99c3da6511906eb7
- size: 778240
+ md5: f283988890339a1e01b295d97ca2f929
+ size: 155648
compile_models:
cmd: python -m deckard.layers.compile --report_folder output/reports/train/ --results_file
output/train.csv
deps:
- path: logs/models/
hash: md5
- md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir
- size: 357091
+ md5: 8e67f43a680648ecc549525d90f55662.dir
+ size: 202043
nfiles: 3
- path: model.db
hash: md5
- md5: 0b595e029e8e9d6e99c3da6511906eb7
- size: 778240
+ md5: f283988890339a1e01b295d97ca2f929
+ size: 155648
- path: output/reports/train/
hash: md5
- md5: 0f4c497909d988c75851e5e56a440b89.dir
- size: 42005082
- nfiles: 1637
+ md5: c4c5ab1d22c12d150cf53a3b630e8442.dir
+ size: 10780144
+ nfiles: 312
outs:
- path: output/train.csv
hash: md5
- md5: 348d49dcbf81f9db4f7abb76fcc2f06e
- size: 598748
+ md5: 5290b41fa9349727642757688378dec0
+ size: 152670
find_best_model@rbf:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model
- --params_file best_rbf --study_name=rbf --default_config model.yaml
+ --params_file best_rbf --study_name=rbf --default_config default.yaml --storage_name
+ sqlite:///model.db
deps:
- path: logs/models/
hash: md5
- md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir
- size: 357091
+ md5: 8e67f43a680648ecc549525d90f55662.dir
+ size: 202043
nfiles: 3
- path: model.db
hash: md5
- md5: 0b595e029e8e9d6e99c3da6511906eb7
- size: 778240
+ md5: f283988890339a1e01b295d97ca2f929
+ size: 155648
- path: output/train.csv
hash: md5
- md5: 348d49dcbf81f9db4f7abb76fcc2f06e
- size: 598748
+ md5: 5290b41fa9349727642757688378dec0
+ size: 152670
outs:
- path: conf/model/best_rbf.yaml
hash: md5
@@ -502,21 +503,22 @@ stages:
size: 359
find_best_model@linear:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model
- --params_file best_linear --study_name=linear --default_config model.yaml
+ --params_file best_linear --study_name=linear --default_config default.yaml
+ --storage_name sqlite:///model.db
deps:
- path: logs/models/
hash: md5
- md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir
- size: 357091
+ md5: 8e67f43a680648ecc549525d90f55662.dir
+ size: 202043
nfiles: 3
- path: model.db
hash: md5
- md5: 0b595e029e8e9d6e99c3da6511906eb7
- size: 778240
+ md5: f283988890339a1e01b295d97ca2f929
+ size: 155648
- path: output/train.csv
hash: md5
- md5: 348d49dcbf81f9db4f7abb76fcc2f06e
- size: 598748
+ md5: 5290b41fa9349727642757688378dec0
+ size: 152670
outs:
- path: conf/model/best_linear.yaml
hash: md5
@@ -524,26 +526,27 @@ stages:
size: 330
find_best_model@poly:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model
- --params_file best_poly --study_name=poly --default_config model.yaml
+ --params_file best_poly --study_name=poly --default_config default.yaml --storage_name
+ sqlite:///model.db
deps:
- path: logs/models/
hash: md5
- md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir
- size: 357091
+ md5: 8e67f43a680648ecc549525d90f55662.dir
+ size: 202043
nfiles: 3
- path: model.db
hash: md5
- md5: 0b595e029e8e9d6e99c3da6511906eb7
- size: 778240
+ md5: f283988890339a1e01b295d97ca2f929
+ size: 155648
- path: output/train.csv
hash: md5
- md5: 348d49dcbf81f9db4f7abb76fcc2f06e
- size: 598748
+ md5: 5290b41fa9349727642757688378dec0
+ size: 152670
outs:
- path: conf/model/best_poly.yaml
hash: md5
- md5: 12f892f3ba4ef8bab095b36bd7558d3e
- size: 372
+ md5: 307b98679bd448826190d15d2c48db7b
+ size: 369
attacks:
cmd: bash attacks.sh ++stage=attack --config-name=attack.yaml
deps:
@@ -553,34 +556,34 @@ stages:
size: 330
- path: conf/model/best_poly.yaml
hash: md5
- md5: 12f892f3ba4ef8bab095b36bd7558d3e
- size: 372
+ md5: 307b98679bd448826190d15d2c48db7b
+ size: 369
- path: conf/model/best_rbf.yaml
hash: md5
md5: 4932ceac75d6256ce2a7864aa4a5ea3c
size: 359
- path: logs/models/
hash: md5
- md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir
- size: 357091
+ md5: 8e67f43a680648ecc549525d90f55662.dir
+ size: 202043
nfiles: 3
- path: model.db
hash: md5
- md5: 0b595e029e8e9d6e99c3da6511906eb7
- size: 778240
+ md5: f283988890339a1e01b295d97ca2f929
+ size: 155648
- path: output/train.csv
hash: md5
- md5: 348d49dcbf81f9db4f7abb76fcc2f06e
- size: 598748
+ md5: 5290b41fa9349727642757688378dec0
+ size: 152670
outs:
- path: attack.db
hash: md5
- md5: 32b63718640047c18ed7bb1aff484595
- size: 389120
+ md5: 7c78ffc40aedba8c75061fdf40fdf315
+ size: 208896
- path: logs/attacks/
hash: md5
- md5: 61801da5096fd94a88d69f6de5be2413.dir
- size: 3180296
+ md5: f9bd73b81f44394d16d6bc194c85fb14.dir
+ size: 420089
nfiles: 3
compile_attacks:
cmd: python -m deckard.layers.compile --report_folder output/reports/attack/ --results_file
@@ -588,89 +591,92 @@ stages:
deps:
- path: attack.db
hash: md5
- md5: 32b63718640047c18ed7bb1aff484595
- size: 389120
+ md5: 7c78ffc40aedba8c75061fdf40fdf315
+ size: 208896
- path: logs/attacks/
hash: md5
- md5: 61801da5096fd94a88d69f6de5be2413.dir
- size: 3180296
+ md5: f9bd73b81f44394d16d6bc194c85fb14.dir
+ size: 420089
nfiles: 3
- path: output/reports/attack/
hash: md5
- md5: 84a4553074e952b76f6a4f228dddbb47.dir
- size: 29299858
- nfiles: 1968
+ md5: 11465f27296c17a8863dcc4bcea9eb22.dir
+ size: 20702813
+ nfiles: 1093
outs:
- path: output/attack.csv
hash: md5
- md5: 188c5eda3a172c9a30808781f429aed4
- size: 703053
+ md5: 490f9a3401c509d62c0b293ffa634a65
+ size: 503235
find_best_attack@linear:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack
- --params_file best_linear --study_name=best_linear --default_config attack.yaml
+ --params_file best_linear --study_name=best_linear --default_config default.yaml
+ --storage_name sqlite:///attack.db --direction minimize
deps:
- path: attack.db
hash: md5
- md5: 32b63718640047c18ed7bb1aff484595
- size: 389120
+ md5: 7c78ffc40aedba8c75061fdf40fdf315
+ size: 208896
- path: logs/models/
hash: md5
- md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir
- size: 357091
+ md5: 8e67f43a680648ecc549525d90f55662.dir
+ size: 202043
nfiles: 3
- path: output/train.csv
hash: md5
- md5: 348d49dcbf81f9db4f7abb76fcc2f06e
- size: 598748
+ md5: 5290b41fa9349727642757688378dec0
+ size: 152670
outs:
- path: conf/attack/best_linear.yaml
hash: md5
- md5: df65ae18996a57abebd38df98db37edb
- size: 245
+ md5: 3b770eef3005669fb6c893dc239337c1
+ size: 248
find_best_attack@rbf:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack
- --params_file best_rbf --study_name=best_rbf --default_config attack.yaml
+ --params_file best_rbf --study_name=best_rbf --default_config default.yaml
+ --storage_name sqlite:///attack.db --direction minimize
deps:
- path: attack.db
hash: md5
- md5: 32b63718640047c18ed7bb1aff484595
- size: 389120
+ md5: 7c78ffc40aedba8c75061fdf40fdf315
+ size: 208896
- path: logs/models/
hash: md5
- md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir
- size: 357091
+ md5: 8e67f43a680648ecc549525d90f55662.dir
+ size: 202043
nfiles: 3
- path: output/train.csv
hash: md5
- md5: 348d49dcbf81f9db4f7abb76fcc2f06e
- size: 598748
+ md5: 5290b41fa9349727642757688378dec0
+ size: 152670
outs:
- path: conf/attack/best_rbf.yaml
hash: md5
- md5: 9871a9d8d50ef211c7f0ae884bb39fe4
- size: 247
+ md5: 78076d6ff4a3f2f5ec4e550db50b759f
+ size: 245
find_best_attack@poly:
cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack
- --params_file best_poly --study_name=best_poly --default_config attack.yaml
+ --params_file best_poly --study_name=best_poly --default_config default.yaml
+ --storage_name sqlite:///attack.db --direction minimize
deps:
- path: attack.db
hash: md5
- md5: 32b63718640047c18ed7bb1aff484595
- size: 389120
+ md5: 7c78ffc40aedba8c75061fdf40fdf315
+ size: 208896
- path: logs/models/
hash: md5
- md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir
- size: 357091
+ md5: 8e67f43a680648ecc549525d90f55662.dir
+ size: 202043
nfiles: 3
- path: output/train.csv
hash: md5
- md5: 348d49dcbf81f9db4f7abb76fcc2f06e
- size: 598748
+ md5: 5290b41fa9349727642757688378dec0
+ size: 152670
outs:
- path: conf/attack/best_poly.yaml
hash: md5
- md5: d4c4945873617b0652018e6f27e52b89
- size: 247
+ md5: 5355e960ee2cab726da8da4f761746b5
+ size: 248
other_data_train@kdd_nsl:
cmd: DATASET_NAME=kdd_nsl bash other_data.sh data=kdd_nsl +stage=train --config-name=model.yaml
deps:
@@ -706,93 +712,94 @@ stages:
deps:
- path: conf/attack/best_linear.yaml
hash: md5
- md5: df65ae18996a57abebd38df98db37edb
- size: 245
+ md5: 3b770eef3005669fb6c893dc239337c1
+ size: 248
- path: conf/attack/best_poly.yaml
hash: md5
- md5: d4c4945873617b0652018e6f27e52b89
- size: 247
+ md5: 5355e960ee2cab726da8da4f761746b5
+ size: 248
- path: conf/attack/best_rbf.yaml
hash: md5
- md5: 9871a9d8d50ef211c7f0ae884bb39fe4
- size: 247
+ md5: 78076d6ff4a3f2f5ec4e550db50b759f
+ size: 245
- path: conf/model/best_linear.yaml
hash: md5
md5: e4ae7059114d8724d4947e952145d4fe
size: 330
- path: conf/model/best_poly.yaml
hash: md5
- md5: 12f892f3ba4ef8bab095b36bd7558d3e
- size: 372
+ md5: 307b98679bd448826190d15d2c48db7b
+ size: 369
- path: conf/model/best_rbf.yaml
hash: md5
md5: 4932ceac75d6256ce2a7864aa4a5ea3c
size: 359
- path: output/attacks/
hash: md5
- md5: cde8aa6baa7c2646a1fc09ea3956b5e6.dir
- size: 327928
- nfiles: 179
- - path: output/models/
- hash: md5
- md5: 420131f3b75400bb25e03920f359494a.dir
- size: 2326552
- nfiles: 272
+ md5: b66feb7848ca1405dfb53b0aa2f6ca1e.dir
+ size: 2036072
+ nfiles: 121
outs:
- path: plots/after_retrain_confidence.csv
hash: md5
- md5: 6818046e86115df423cf15e24a43536f
- size: 52143
+ md5: 73b389e63f70f94899b8c3d6d3c97bcd
+ size: 394238
- path: plots/before_retrain_confidence.csv
hash: md5
- md5: d479df2e41303c4466ff8f9218d0fe66
- size: 52126
+ md5: 9ee0eafdd6ba1764ae7f31f5856fe164
+ size: 394221
- path: retrain/
hash: md5
- md5: 2360b46dfe437da0aff771c4522c37eb.dir
- size: 174505
+ md5: 19310315f07f04e7842f59c9df05db78.dir
+ size: 176116
nfiles: 12
plots:
cmd: python plots.py
deps:
- path: output/attack.csv
hash: md5
- md5: 188c5eda3a172c9a30808781f429aed4
- size: 703053
+ md5: 490f9a3401c509d62c0b293ffa634a65
+ size: 503235
- path: output/train.csv
hash: md5
- md5: 348d49dcbf81f9db4f7abb76fcc2f06e
- size: 598748
+ md5: 5290b41fa9349727642757688378dec0
+ size: 152670
+ - path: plots.py
+ hash: md5
+ md5: f1f73855e466a5f38128b4123f7bd186
+ size: 10155
- path: plots/after_retrain_confidence.csv
hash: md5
- md5: 6818046e86115df423cf15e24a43536f
- size: 52143
+ md5: 73b389e63f70f94899b8c3d6d3c97bcd
+ size: 394238
- path: plots/before_retrain_confidence.csv
hash: md5
- md5: d479df2e41303c4466ff8f9218d0fe66
- size: 52126
+ md5: 9ee0eafdd6ba1764ae7f31f5856fe164
+ size: 394221
outs:
- - path: plots/accuracy_vs_attack_parameters.pdf
+ - path: plots/accuracy_vs_attack_parameters.eps
hash: md5
- md5: 9a97f9f585f99c7794818b8fa38ac311
- size: 15792
- - path: plots/confidence_vs_attack_parameters.pdf
+ md5: aa706c0ecf286ccbebf168f078a29d75
+ size: 39185
+ - path: plots/confidence_vs_attack_parameters.eps
hash: md5
- md5: 65d58bfd40e40bea5e9114c84e353ea2
- size: 17506
- - path: plots/retrain_accuracy.pdf
+ md5: a77acb08b4c7bfa4ad937b6a085b9eed
+ size: 41336
+ - path: plots/retrain_accuracy.eps
hash: md5
- md5: 577e89d46eb6f2446d0a3ed83b4f9e19
- size: 13913
- - path: plots/retrain_confidence_vs_attack_parameters.pdf
+ md5: 106ffdb6d70899f23fc71927e5029133
+ size: 30830
+ - path: plots/retrain_confidence_vs_attack_parameters.eps
hash: md5
- md5: e1fa2d6ebd91b406426215c07d9df11a
- size: 18683
- - path: plots/retrain_time.pdf
+ md5: 002bd002f2e020dadcc8cc18bacbe13f
+ size: 41837
+ - path: plots/retrain_time.eps
hash: md5
- md5: d48a53f11dd9db3b30b9382e3404963d
- size: 12916
- - path: plots/train_time_vs_attack_parameters.pdf
+ md5: 9fcacfebf8617111de7d546b788ba83f
+ size: 28365
+ - path: plots/train_time_vs_attack_parameters.eps
hash: md5
- md5: f0a52d3088d3b90f7d6e157b87e6fc5a
- size: 17167
+ md5: 22fa5b3a2e2b5d8b532a59415484223b
+ size: 39894
+ move_files:
+ cmd: cp -r ./plots/* ~/KDD-Paper-EAI-AISEC/truthseeker/ && rm ~/KDD-Paper-EAI-AISEC/truthseeker/.gitignore
diff --git a/examples/security/truthseeker/dvc.yaml b/examples/security/truthseeker/dvc.yaml
index 6b6c8962..0794289c 100644
--- a/examples/security/truthseeker/dvc.yaml
+++ b/examples/security/truthseeker/dvc.yaml
@@ -73,7 +73,7 @@ stages:
- rbf
- poly
do:
- cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model --params_file best_${item} --study_name=${item} --default_config model.yaml
+ cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model --params_file best_${item} --study_name=${item} --default_config default.yaml --storage_name sqlite:///model.db
outs:
- conf/model/best_${item}.yaml
deps:
@@ -111,7 +111,7 @@ stages:
- rbf
- poly
do:
- cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack --params_file best_${item} --study_name=best_${item} --default_config attack.yaml
+ cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack --params_file best_${item} --study_name=best_${item} --default_config default.yaml --storage_name sqlite:///attack.db --direction minimize
outs:
- conf/attack/best_${item}.yaml
deps:
@@ -121,7 +121,6 @@ stages:
retrain:
cmd : python retrain.py
deps:
- - ${files.directory}/models/
- ${files.directory}/attacks/
- conf/attack/best_linear.yaml
- conf/attack/best_rbf.yaml
@@ -141,14 +140,18 @@ stages:
- output/attack.csv
- plots/before_retrain_confidence.csv
- output/train.csv
+ - plots.py
plots :
- - plots/accuracy_vs_attack_parameters.pdf
- # - plots/accuracy_vs_features.pdf
- # - plots/accuracy_vs_samples.pdf
- - plots/confidence_vs_attack_parameters.pdf
- - plots/train_time_vs_attack_parameters.pdf
- # - plots/train_time_vs_features.pdf
- # - plots/train_time_vs_samples.pdf
- - plots/retrain_accuracy.pdf
- - plots/retrain_confidence_vs_attack_parameters.pdf
- - plots/retrain_time.pdf
+ - plots/accuracy_vs_attack_parameters.eps
+ # - plots/accuracy_vs_features.eps
+ # - plots/accuracy_vs_samples.eps
+ - plots/confidence_vs_attack_parameters.eps
+ - plots/train_time_vs_attack_parameters.eps
+ # - plots/train_time_vs_features.eps
+ # - plots/train_time_vs_samples.eps
+ - plots/retrain_accuracy.eps
+ - plots/retrain_confidence_vs_attack_parameters.eps
+ - plots/retrain_time.eps
+ move_files:
+ cmd: >-
+ cp -r ./plots/* ~/KDD-Paper-EAI-AISEC/truthseeker/ && rm ~/KDD-Paper-EAI-AISEC/truthseeker/.gitignore
diff --git a/examples/security/truthseeker/plots.py b/examples/security/truthseeker/plots.py
index c5ae8ac3..b5499185 100644
--- a/examples/security/truthseeker/plots.py
+++ b/examples/security/truthseeker/plots.py
@@ -2,7 +2,6 @@
import seaborn as sns
from pathlib import Path
import matplotlib.pyplot as plt
-
import logging
sns.set_style("whitegrid")
@@ -19,28 +18,16 @@
# else:
# results = parse_results("reports/model_queue/")
results = pd.read_csv("output/train.csv")
-# input_size = results["data.generate.kwargs.n_samples"] * results["data.generate.kwargs.n_features"]
-results["Kernel"] = results["model.init.kwargs.kernel"].copy()
-# results["Features"] = results["data.generate.kwargs.n_features"].copy()
-results["Samples"] = results["data.sample.train_size"].copy()
-# results["input_size"] = input_size
-# sample_list = results["data.generate.kwargs.n_samples"].unique()
-# feature_list = results["data.generate.kwargs.n_features"].unique()
-kernel_list = results["model.init.kwargs.kernel"].unique()
+results["Kernel"] = results["model.init.kernel"].copy()
if "Unnamed: 0" in results.columns:
del results["Unnamed: 0"]
for col in results.columns:
if col == "data.name" and isinstance(results[col][0], list):
results[col] = results[col].apply(lambda x: x[0])
-results = results[results["model.init.kwargs.kernel"] != "sigmoid"]
+results = results[results["model.init.kernel"] != "sigmoid"]
attack_results = pd.read_csv("output/attack.csv")
-attack_results["Kernel"] = attack_results["model.init.kwargs.kernel"].copy()
-# attack_results["Features"] = attack_results["data.generate.kwargs.n_features"].copy()
-# attack_results["Samples"] = attack_results["data.sample.train_size"].copy()
-# sample_list = attack_results["data.generate.kwargs.n_samples"].unique()
-# feature_list = attack_results["data.generate.kwargs.n_features"].unique()
-kernel_list = attack_results["model.init.kwargs.kernel"].unique()
+attack_results["Kernel"] = attack_results["model.init.kernel"].copy()
if "Unnamed: 0" in attack_results.columns:
del attack_results["Unnamed: 0"]
for col in attack_results.columns:
@@ -48,75 +35,26 @@
attack_results[col] = attack_results[col].apply(lambda x: x[0])
-# graph1 = sns.lineplot(
-# x="data.sample.train_size",
-# y="accuracy",
-# data=results,
-# style="Kernel",
-# style_order=["rbf", "poly", "linear"],
-# )
-# graph1.legend(labels=["Linear", "RBF", "Poly"])
-# graph1.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
-# graph1.set_xlabel("Number of Samples")
-# graph1.set_ylabel("Accuracy")
-# graph1.set_xscale("log")
-# graph1.get_figure().tight_layout()
-# graph1.get_figure().savefig("plots/accuracy_vs_samples.pdf")
-# plt.gcf().clear()
-
-# graph2 = sns.lineplot(
-# x="data.generate.kwargs.n_features",
-# y="accuracy",
-# data=results,
-# style="Kernel",
-# style_order=["rbf", "poly", "linear"],
-# )
-# graph2.set_xlabel("Number of Features")
-# graph2.set_ylabel("Accuracy")
-# graph2.set_xscale("log")
-# graph2.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
-# graph2.get_figure().tight_layout()
-# graph2.get_figure().savefig("plots/accuracy_vs_features.pdf")
-# plt.gcf().clear()
-
-# results["train_time"] = (
-# results["train_time"]
-# * results["data.sample.train_size"]
-# * results["data.generate.kwargs.n_samples"]
-# )
-# graph3 = sns.lineplot(
-# x="data.generate.kwargs.n_features",
-# y="train_time",
-# data=results,
-# style="Kernel",
-# style_order=["rbf", "poly", "linear"],
-# )
-# graph3.set_xlabel("Number of Features")
-# graph3.set_ylabel("Training Time")
-# graph3.set(yscale="log", xscale="log")
-# graph3.legend(title="Kernel")
-# graph3.get_figure().tight_layout()
-# graph3.get_figure().savefig("plots/train_time_vs_features.pdf")
-# plt.gcf().clear()
-
-# graph4 = sns.lineplot(
-# x="data.sample.train_size",
-# y="train_time",
-# data=results,
-# style="Kernel",
-# style_order=["rbf", "poly", "linear"],
-# )
-# graph4.set_xlabel("Number of Samples")
-# graph4.set_ylabel("Training Time")
-# graph4.set(yscale="log", xscale="log")
-# graph4.legend(title="Kernel")
-# graph4.get_figure().tight_layout()
-# graph4.get_figure().savefig("plots/train_time_vs_samples.eps")
-# plt.gcf().clear()
+graph4 = sns.lineplot(
+ x="data.sample.train_size",
+ y="train_time",
+ data=results,
+ style="Kernel",
+ style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
+)
+graph4.set_xlabel("Number of Samples")
+graph4.set_ylabel("Training Time")
+graph4.set(yscale="log", xscale="log", xlim=(10, 1e6))
+graph4.legend(title="Kernel")
+graph4.get_figure().tight_layout()
+graph4.get_figure().savefig("plots/train_time_vs_samples.eps")
+plt.gcf().clear()
fig, ax = plt.subplots(2, 2)
graph5 = sns.lineplot(
- x="attack.init.kwargs.eps",
+ x="attack.init.eps",
y="accuracy",
data=attack_results,
style="Kernel",
@@ -124,20 +62,24 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph5.set(xscale="log", xlabel="Perturbation Distance", ylabel="Accuracy")
graph6 = sns.lineplot(
- x="attack.init.kwargs.eps_step",
+ x="attack.init.eps_step",
y="accuracy",
data=attack_results,
style="Kernel",
ax=ax[0, 1],
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph6.set(xscale="log", xlabel="Perturbation Step", ylabel="Accuracy")
graph7 = sns.lineplot(
- x="attack.init.kwargs.max_iter",
+ x="attack.init.max_iter",
y="accuracy",
data=attack_results,
style="Kernel",
@@ -145,10 +87,12 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph7.set(xscale="log", xlabel="Maximum Iterations", ylabel="Accuracy")
graph8 = sns.lineplot(
- x="attack.init.kwargs.batch_size",
+ x="attack.init.batch_size",
y="accuracy",
data=attack_results,
style="Kernel",
@@ -156,16 +100,18 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph8.set(xscale="log", xlabel="Batch Size", ylabel="Accuracy")
graph6.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
fig.tight_layout()
-fig.savefig("plots/accuracy_vs_attack_parameters.pdf")
+fig.savefig("plots/accuracy_vs_attack_parameters.eps")
plt.gcf().clear()
fig, ax = plt.subplots(2, 2)
graph9 = sns.lineplot(
- x="attack.init.kwargs.eps",
+ x="attack.init.eps",
y="adv_fit_time",
data=attack_results,
style="Kernel",
@@ -173,20 +119,24 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="Attack Time")
graph10 = sns.lineplot(
- x="attack.init.kwargs.eps_step",
+ x="attack.init.eps_step",
y="adv_fit_time",
data=attack_results,
style="Kernel",
ax=ax[0, 1],
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="Attack Time")
graph11 = sns.lineplot(
- x="attack.init.kwargs.max_iter",
+ x="attack.init.max_iter",
y="adv_fit_time",
data=attack_results,
style="Kernel",
@@ -194,10 +144,12 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="Attack Time")
graph12 = sns.lineplot(
- x="attack.init.kwargs.batch_size",
+ x="attack.init.batch_size",
y="adv_fit_time",
data=attack_results,
style="Kernel",
@@ -205,11 +157,13 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph12.set(xscale="log", xlabel="Batch Size", ylabel="Attack Time")
graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
fig.tight_layout(h_pad=0.5)
-fig.savefig("plots/train_time_vs_attack_parameters.pdf")
+fig.savefig("plots/train_time_vs_attack_parameters.eps")
plt.gcf().clear()
retrain_df = pd.DataFrame()
@@ -232,6 +186,8 @@
data=retrain_df,
style="Kernel",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
retrain = sns.lineplot(
x="Epochs",
@@ -241,12 +197,14 @@
color="darkred",
legend=False,
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
retrain.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
retrain.set_xlabel("Retraining Epochs")
retrain.set_ylabel("Accuracy")
retrain.get_figure().tight_layout()
-retrain.get_figure().savefig("plots/retrain_accuracy.pdf")
+retrain.get_figure().savefig("plots/retrain_accuracy.eps")
plt.gcf().clear()
retrain_df["ben_time"] = retrain_df["ben_time"] * retrain_df["train_size"] * 10
@@ -257,6 +215,8 @@
data=retrain_df,
style="Kernel",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
retrain = sns.lineplot(
x="Epochs",
@@ -266,13 +226,15 @@
color="darkred",
legend=False,
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
retrain.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
retrain.set_xlabel("Retraining Epochs")
retrain.set_ylabel("Time")
retrain.set_yscale("log")
retrain.get_figure().tight_layout()
-retrain.get_figure().savefig("plots/retrain_time.pdf")
+retrain.get_figure().savefig("plots/retrain_time.eps")
plt.gcf().clear()
confidence_df = pd.read_csv("plots/before_retrain_confidence.csv")
@@ -286,6 +248,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="False Confidence")
graph10 = sns.lineplot(
@@ -296,6 +260,8 @@
ax=ax[0, 1],
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="False Confidence")
graph11 = sns.lineplot(
@@ -307,6 +273,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="False Confidence")
graph12 = sns.lineplot(
@@ -318,11 +286,13 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph12.set(xscale="log", xlabel="Batch Size", ylabel="False Confidence")
graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
fig.tight_layout(h_pad=0.5)
-fig.savefig("plots/confidence_vs_attack_parameters.pdf")
+fig.savefig("plots/confidence_vs_attack_parameters.eps")
plt.gcf().clear()
confdence_df = pd.read_csv("plots/after_retrain_confidence.csv")
@@ -337,6 +307,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="False Confidence")
graph10 = sns.lineplot(
@@ -347,6 +319,8 @@
ax=ax[0, 1],
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="False Confidence")
graph11 = sns.lineplot(
@@ -358,6 +332,8 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="False Confidence")
graph12 = sns.lineplot(
@@ -369,9 +345,11 @@
legend=False,
color="darkred",
style_order=["rbf", "poly", "linear"],
+ err_style="bars",
+ errorbar=("ci", 99),
)
graph12.set(xscale="log", xlabel="Batch Size", ylabel="False Confidence")
graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel")
fig.tight_layout(h_pad=0.5)
-fig.savefig("plots/retrain_confidence_vs_attack_parameters.pdf")
+fig.savefig("plots/retrain_confidence_vs_attack_parameters.eps")
plt.gcf().clear()
diff --git a/examples/security/truthseeker/plots/.gitignore b/examples/security/truthseeker/plots/.gitignore
index dd345776..f09089fa 100644
--- a/examples/security/truthseeker/plots/.gitignore
+++ b/examples/security/truthseeker/plots/.gitignore
@@ -1,6 +1,6 @@
-/accuracy_vs_attack_parameters.pdf
-/confidence_vs_attack_parameters.pdf
-/train_time_vs_attack_parameters.pdf
-/retrain_accuracy.pdf
-/retrain_confidence_vs_attack_parameters.pdf
-/retrain_time.pdf
+/accuracy_vs_attack_parameters.eps
+/confidence_vs_attack_parameters.eps
+/train_time_vs_attack_parameters.eps
+/retrain_accuracy.eps
+/retrain_confidence_vs_attack_parameters.eps
+/retrain_time.eps
diff --git a/examples/security/truthseeker/plots/train_time_vs_samples.eps b/examples/security/truthseeker/plots/train_time_vs_samples.eps
new file mode 100644
index 00000000..0d282c40
--- /dev/null
+++ b/examples/security/truthseeker/plots/train_time_vs_samples.eps
@@ -0,0 +1,1373 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Title: train_time_vs_samples.eps
+%%Creator: Matplotlib v3.7.2, https://matplotlib.org/
+%%CreationDate: Tue Jul 16 15:31:57 2024
+%%Orientation: portrait
+%%BoundingBox: 75 223 537 569
+%%HiResBoundingBox: 75.600000 223.200000 536.400000 568.800000
+%%EndComments
+%%BeginProlog
+/mpldict 11 dict def
+mpldict begin
+/_d { bind def } bind def
+/m { moveto } _d
+/l { lineto } _d
+/r { rlineto } _d
+/c { curveto } _d
+/cl { closepath } _d
+/ce { closepath eofill } _d
+/box {
+ m
+ 1 index 0 r
+ 0 exch r
+ neg 0 r
+ cl
+ } _d
+/clipbox {
+ box
+ clip
+ newpath
+ } _d
+/sc { setcachedevice } _d
+%!PS-Adobe-3.0 Resource-Font
+%%Creator: Converted from TrueType to Type 3 by Matplotlib.
+10 dict begin
+/FontName /DejaVuSerif def
+/PaintType 0 def
+/FontMatrix [0.00048828125 0 0 0.00048828125 0 0] def
+/FontBBox [-1576 -710 4312 2272] def
+/FontType 3 def
+/Encoding [/minus /space /period /zero /one /two /three /four /five /six /eight /K /N /S /T /multiply /a /b /e /f /g /i /l /m /n /o /p /r /s /u /y] def
+/CharStrings 32 dict dup begin
+/.notdef 0 def
+/minus{1716 0 217 561 1499 723 sc
+217 723 m
+1499 723 l
+1499 561 l
+217 561 l
+217 723 l
+
+ce} _d
+/space{651 0 0 0 0 0 sc
+ce} _d
+/period{651 0 193 -29 459 238 sc
+193 104 m
+193 141 206 173 231 199 c
+256 225 288 238 326 238 c
+363 238 394 225 420 199 c
+446 173 459 141 459 104 c
+459 67 446 36 420 10 c
+394 -16 363 -29 326 -29 c
+288 -29 256 -16 231 9 c
+206 35 193 67 193 104 c
+
+ce} _d
+/zero{1303 0 135 -29 1167 1520 sc
+651 70 m
+753 70 829 126 880 238 c
+931 350 956 519 956 745 c
+956 972 931 1141 880 1253 c
+829 1365 753 1421 651 1421 c
+549 1421 473 1365 422 1253 c
+371 1141 346 972 346 745 c
+346 519 371 350 422 238 c
+473 126 549 70 651 70 c
+
+651 -29 m
+489 -29 362 39 271 175 c
+180 311 135 501 135 745 c
+135 990 180 1180 271 1316 c
+362 1452 489 1520 651 1520 c
+814 1520 940 1452 1031 1316 c
+1122 1180 1167 990 1167 745 c
+1167 501 1122 311 1031 175 c
+940 39 814 -29 651 -29 c
+
+ce} _d
+/one{1303 0 250 0 1012 1520 sc
+291 0 m
+291 106 l
+551 106 l
+551 1348 l
+250 1153 l
+250 1284 l
+614 1520 l
+752 1520 l
+752 106 l
+1012 106 l
+1012 0 l
+291 0 l
+
+ce} _d
+/two{1303 0 139 0 1102 1520 sc
+262 1137 m
+150 1137 l
+150 1403 l
+221 1441 293 1470 365 1490 c
+438 1510 509 1520 578 1520 c
+733 1520 856 1482 946 1407 c
+1036 1332 1081 1229 1081 1100 c
+1081 954 979 779 775 576 c
+759 561 747 549 739 541 c
+362 164 l
+985 164 l
+985 348 l
+1102 348 l
+1102 0 l
+139 0 l
+139 109 l
+592 561 l
+692 661 763 753 806 836 c
+849 920 870 1008 870 1100 c
+870 1201 844 1279 791 1336 c
+739 1393 667 1421 575 1421 c
+480 1421 406 1397 354 1350 c
+302 1303 271 1232 262 1137 c
+
+ce} _d
+/three{1303 0 156 -29 1151 1520 sc
+199 1430 m
+277 1459 352 1482 423 1497 c
+495 1512 562 1520 625 1520 c
+771 1520 885 1488 967 1425 c
+1049 1362 1090 1275 1090 1163 c
+1090 1073 1062 998 1005 937 c
+948 877 868 836 764 815 c
+887 798 982 753 1049 681 c
+1117 610 1151 517 1151 403 c
+1151 264 1104 157 1010 82 c
+917 8 782 -29 606 -29 c
+528 -29 452 -21 377 -4 c
+303 13 229 38 156 72 c
+156 362 l
+268 362 l
+275 266 307 193 365 144 c
+423 95 505 70 610 70 c
+712 70 792 99 851 158 c
+910 217 940 298 940 401 c
+940 518 910 607 849 667 c
+788 728 699 758 582 758 c
+487 758 l
+487 860 l
+537 860 l
+654 860 741 884 799 932 c
+858 981 887 1054 887 1151 c
+887 1238 863 1305 815 1351 c
+767 1398 698 1421 608 1421 c
+518 1421 448 1400 398 1357 c
+349 1314 320 1251 311 1167 c
+199 1167 l
+199 1430 l
+
+ce} _d
+/four{1303 0 63 0 1200 1520 sc
+715 506 m
+715 1300 l
+205 506 l
+715 506 l
+
+1155 0 m
+475 0 l
+475 106 l
+715 106 l
+715 399 l
+63 399 l
+63 508 l
+717 1520 l
+915 1520 l
+915 506 l
+1200 506 l
+1200 399 l
+915 399 l
+915 106 l
+1155 106 l
+1155 0 l
+
+ce} _d
+/five{1303 0 174 -29 1145 1493 sc
+1030 1493 m
+1030 1329 l
+346 1329 l
+346 901 l
+381 925 421 943 467 955 c
+514 967 566 973 623 973 c
+784 973 912 928 1005 839 c
+1098 750 1145 628 1145 473 c
+1145 315 1098 192 1003 103 c
+909 15 777 -29 606 -29 c
+537 -29 467 -21 395 -4 c
+323 13 249 38 174 72 c
+174 362 l
+287 362 l
+293 267 323 195 377 145 c
+432 95 508 70 606 70 c
+711 70 792 105 849 174 c
+906 243 934 343 934 473 c
+934 602 906 701 849 770 c
+793 839 712 874 606 874 c
+546 874 493 863 447 842 c
+402 821 361 788 326 743 c
+240 743 l
+240 1493 l
+1030 1493 l
+
+ce} _d
+/six{1303 0 137 -29 1174 1520 sc
+670 70 m
+764 70 836 104 887 173 c
+938 242 963 342 963 471 c
+963 600 938 699 887 768 c
+836 837 764 872 670 872 c
+575 872 502 839 452 772 c
+402 705 377 609 377 483 c
+377 350 402 248 453 177 c
+504 106 576 70 670 70 c
+
+344 822 m
+389 872 441 909 498 934 c
+555 959 620 971 692 971 c
+841 971 958 926 1044 837 c
+1131 748 1174 626 1174 471 c
+1174 320 1127 198 1034 107 c
+941 16 817 -29 662 -29 c
+493 -29 364 34 273 159 c
+182 285 137 465 137 698 c
+137 959 191 1162 298 1305 c
+405 1448 557 1520 752 1520 c
+805 1520 860 1515 918 1505 c
+976 1495 1035 1480 1096 1460 c
+1096 1214 l
+983 1214 l
+975 1281 949 1333 906 1368 c
+863 1403 804 1421 731 1421 c
+602 1421 505 1372 442 1274 c
+379 1176 346 1025 344 822 c
+
+ce} _d
+/eight{1303 0 137 -29 1165 1520 sc
+954 408 m
+954 515 927 597 874 656 c
+821 715 747 745 651 745 c
+555 745 480 715 427 656 c
+374 597 348 515 348 408 c
+348 301 374 217 427 158 c
+480 99 555 70 651 70 c
+747 70 821 99 874 158 c
+927 217 954 301 954 408 c
+
+913 1133 m
+913 1224 890 1294 844 1345 c
+798 1396 734 1421 651 1421 c
+569 1421 505 1396 458 1345 c
+412 1294 389 1224 389 1133 c
+389 1042 412 971 458 920 c
+505 869 569 844 651 844 c
+734 844 798 869 844 920 c
+890 971 913 1042 913 1133 c
+
+805 795 m
+918 780 1007 738 1070 669 c
+1133 601 1165 514 1165 408 c
+1165 268 1121 160 1032 84 c
+943 9 816 -29 651 -29 c
+486 -29 359 9 270 84 c
+181 160 137 268 137 408 c
+137 514 169 601 232 669 c
+295 738 384 780 498 795 c
+397 813 320 851 266 909 c
+213 968 186 1042 186 1133 c
+186 1253 227 1347 310 1416 c
+393 1485 506 1520 651 1520 c
+796 1520 909 1485 992 1416 c
+1075 1347 1116 1253 1116 1133 c
+1116 1042 1089 968 1035 909 c
+982 851 905 813 805 795 c
+
+ce} _d
+/K{1530 0 113 0 1561 1493 sc
+113 0 m
+113 106 l
+303 106 l
+303 1386 l
+113 1386 l
+113 1493 l
+696 1493 l
+696 1386 l
+506 1386 l
+506 821 l
+1149 1386 l
+987 1386 l
+987 1493 l
+1483 1493 l
+1483 1386 l
+1315 1386 l
+674 823 l
+1391 106 l
+1561 106 l
+1561 0 l
+1214 0 l
+506 709 l
+506 106 l
+696 106 l
+696 0 l
+113 0 l
+
+ce} _d
+/N{1792 0 100 -29 1702 1493 sc
+100 0 m
+100 106 l
+301 106 l
+301 1386 l
+100 1386 l
+100 1493 l
+483 1493 l
+1378 315 l
+1378 1386 l
+1178 1386 l
+1178 1493 l
+1702 1493 l
+1702 1386 l
+1501 1386 l
+1501 -29 l
+1380 -29 l
+424 1229 l
+424 106 l
+625 106 l
+625 0 l
+100 0 l
+
+ce} _d
+/S{1403 0 172 -29 1253 1520 sc
+190 72 m
+190 412 l
+305 411 l
+308 298 341 214 403 159 c
+466 105 561 78 688 78 c
+807 78 897 101 959 148 c
+1022 195 1053 264 1053 354 c
+1053 426 1034 481 996 520 c
+959 559 879 596 758 633 c
+561 692 l
+418 735 318 789 259 854 c
+201 919 172 1007 172 1120 c
+172 1247 217 1345 307 1415 c
+397 1485 523 1520 686 1520 c
+755 1520 831 1512 914 1497 c
+997 1482 1085 1461 1178 1432 c
+1178 1114 l
+1065 1114 l
+1054 1219 1018 1295 959 1342 c
+900 1389 811 1413 690 1413 c
+585 1413 504 1391 449 1348 c
+394 1305 367 1243 367 1161 c
+367 1090 388 1034 429 993 c
+470 952 558 912 692 872 c
+877 817 l
+1012 776 1109 724 1166 661 c
+1224 598 1253 514 1253 408 c
+1253 263 1207 154 1114 81 c
+1021 8 883 -29 700 -29 c
+618 -29 534 -21 449 -4 c
+364 13 278 38 190 72 c
+
+ce} _d
+/T{1366 0 20 0 1346 1493 sc
+391 0 m
+391 106 l
+582 106 l
+582 1374 l
+143 1374 l
+143 1141 l
+20 1141 l
+20 1493 l
+1346 1493 l
+1346 1141 l
+1223 1141 l
+1223 1374 l
+784 1374 l
+784 106 l
+975 106 l
+975 0 l
+391 0 l
+
+ce} _d
+/multiply{1716 0 283 68 1434 1217 sc
+1434 1104 m
+971 641 l
+1434 180 l
+1319 68 l
+858 528 l
+397 68 l
+283 180 l
+743 641 l
+283 1104 l
+397 1217 l
+858 756 l
+1319 1217 l
+1434 1104 l
+
+ce} _d
+/a{1221 0 102 -29 1163 1092 sc
+815 334 m
+815 559 l
+578 559 l
+487 559 419 539 374 500 c
+329 461 307 400 307 319 c
+307 245 330 186 375 143 c
+420 100 482 78 559 78 c
+636 78 697 102 744 149 c
+791 196 815 258 815 334 c
+
+999 664 m
+999 106 l
+1163 106 l
+1163 0 l
+815 0 l
+815 115 l
+774 66 727 29 674 6 c
+621 -17 558 -29 487 -29 c
+369 -29 275 2 206 65 c
+137 128 102 212 102 319 c
+102 429 142 514 221 575 c
+300 636 412 666 557 666 c
+815 666 l
+815 739 l
+815 820 790 882 741 926 c
+692 971 624 993 535 993 c
+462 993 403 976 360 943 c
+317 910 290 860 279 795 c
+184 795 l
+184 1010 l
+248 1037 310 1058 370 1071 c
+431 1085 490 1092 547 1092 c
+694 1092 806 1055 883 982 c
+960 909 999 803 999 664 c
+
+ce} _d
+/b{1311 0 59 -29 1208 1556 sc
+236 106 m
+236 1450 l
+59 1450 l
+59 1556 l
+420 1556 l
+420 897 l
+456 964 502 1013 557 1044 c
+613 1076 682 1092 764 1092 c
+895 1092 1001 1040 1084 937 c
+1167 834 1208 699 1208 532 c
+1208 365 1167 230 1084 126 c
+1001 23 895 -29 764 -29 c
+682 -29 613 -13 557 18 c
+502 50 456 99 420 166 c
+420 0 l
+59 0 l
+59 106 l
+236 106 l
+
+420 479 m
+420 351 444 253 493 186 c
+542 119 614 86 707 86 c
+801 86 872 124 920 199 c
+969 274 993 385 993 532 c
+993 679 969 790 920 865 c
+872 940 801 977 707 977 c
+614 977 542 943 493 876 c
+444 809 420 711 420 584 c
+420 479 l
+
+ce} _d
+/e{1212 0 102 -29 1110 1092 sc
+1110 512 m
+317 512 l
+317 504 l
+317 361 344 252 398 179 c
+452 106 532 70 637 70 c
+718 70 784 91 835 133 c
+887 176 923 239 944 322 c
+1092 322 l
+1063 205 1008 118 929 59 c
+850 0 747 -29 618 -29 c
+463 -29 338 22 243 124 c
+149 227 102 363 102 532 c
+102 700 148 835 241 938 c
+334 1041 455 1092 606 1092 c
+767 1092 890 1042 976 943 c
+1062 844 1107 701 1110 512 c
+
+893 618 m
+889 742 863 835 814 898 c
+766 961 697 993 606 993 c
+521 993 455 961 406 898 c
+357 835 328 741 317 618 c
+893 618 l
+
+ce} _d
+/f{758 0 74 0 881 1556 sc
+881 1305 m
+784 1305 l
+783 1355 769 1393 741 1419 c
+714 1445 674 1458 621 1458 c
+552 1458 504 1439 476 1401 c
+448 1364 434 1297 434 1200 c
+434 1063 l
+731 1063 l
+731 956 l
+434 956 l
+434 106 l
+670 106 l
+670 0 l
+74 0 l
+74 106 l
+250 106 l
+250 956 l
+74 956 l
+74 1063 l
+250 1063 l
+250 1196 l
+250 1315 281 1404 342 1465 c
+404 1526 495 1556 614 1556 c
+659 1556 703 1552 748 1544 c
+793 1536 837 1524 881 1507 c
+881 1305 l
+
+ce} _d
+/g{1311 0 102 -455 1251 1092 sc
+1075 956 m
+1075 23 l
+1075 -130 1033 -247 949 -330 c
+865 -413 745 -455 590 -455 c
+520 -455 453 -449 389 -436 c
+325 -423 264 -404 205 -379 c
+205 -156 l
+301 -156 l
+313 -225 341 -276 386 -308 c
+431 -340 495 -356 578 -356 c
+686 -356 765 -325 815 -264 c
+866 -203 891 -108 891 23 c
+891 166 l
+855 99 809 50 753 18 c
+698 -13 629 -29 547 -29 c
+416 -29 309 23 226 126 c
+143 230 102 365 102 532 c
+102 699 143 834 226 937 c
+309 1040 416 1092 547 1092 c
+629 1092 698 1076 753 1044 c
+809 1013 855 964 891 897 c
+891 1063 l
+1251 1063 l
+1251 956 l
+1075 956 l
+
+891 584 m
+891 711 866 809 817 876 c
+768 943 697 977 604 977 c
+509 977 438 940 389 865 c
+341 790 317 679 317 532 c
+317 385 341 274 389 199 c
+438 124 509 86 604 86 c
+697 86 768 119 817 186 c
+866 253 891 351 891 479 c
+891 584 l
+
+ce} _d
+/i{655 0 74 0 608 1507 sc
+199 1393 m
+199 1424 210 1450 232 1473 c
+255 1496 282 1507 313 1507 c
+344 1507 370 1496 392 1473 c
+415 1450 426 1424 426 1393 c
+426 1362 415 1335 393 1313 c
+371 1291 344 1280 313 1280 c
+282 1280 255 1291 232 1313 c
+210 1335 199 1362 199 1393 c
+
+434 106 m
+608 106 l
+608 0 l
+74 0 l
+74 106 l
+250 106 l
+250 956 l
+74 956 l
+74 1063 l
+434 1063 l
+434 106 l
+
+ce} _d
+/l{655 0 59 0 594 1556 sc
+420 106 m
+594 106 l
+594 0 l
+59 0 l
+59 106 l
+236 106 l
+236 1450 l
+59 1450 l
+59 1556 l
+420 1556 l
+420 106 l
+
+ce} _d
+/m{1942 0 74 0 1886 1092 sc
+1061 856 m
+1096 934 1141 993 1196 1032 c
+1251 1072 1316 1092 1389 1092 c
+1500 1092 1583 1057 1638 988 c
+1693 919 1720 815 1720 676 c
+1720 106 l
+1886 106 l
+1886 0 l
+1376 0 l
+1376 106 l
+1536 106 l
+1536 655 l
+1536 764 1520 841 1488 887 c
+1456 933 1403 956 1329 956 c
+1247 956 1184 925 1141 863 c
+1098 801 1077 711 1077 592 c
+1077 106 l
+1237 106 l
+1237 0 l
+733 0 l
+733 106 l
+893 106 l
+893 662 l
+893 768 877 843 845 888 c
+813 933 760 956 686 956 c
+604 956 541 925 498 863 c
+455 801 434 711 434 592 c
+434 106 l
+594 106 l
+594 0 l
+84 0 l
+84 106 l
+250 106 l
+250 958 l
+74 958 l
+74 1063 l
+434 1063 l
+434 874 l
+468 945 511 999 564 1036 c
+617 1073 676 1092 743 1092 c
+826 1092 895 1071 950 1030 c
+1005 989 1042 931 1061 856 c
+
+ce} _d
+/n{1319 0 74 0 1262 1092 sc
+84 0 m
+84 106 l
+250 106 l
+250 956 l
+74 956 l
+74 1063 l
+434 1063 l
+434 874 l
+468 946 512 1000 566 1037 c
+621 1074 684 1092 756 1092 c
+873 1092 960 1058 1015 991 c
+1070 924 1098 819 1098 676 c
+1098 106 l
+1262 106 l
+1262 0 l
+754 0 l
+754 106 l
+913 106 l
+913 618 l
+913 748 897 837 865 885 c
+833 934 777 958 696 958 c
+611 958 546 927 501 864 c
+456 802 434 711 434 592 c
+434 106 l
+594 106 l
+594 0 l
+84 0 l
+
+ce} _d
+/o{1233 0 102 -29 1130 1092 sc
+616 70 m
+715 70 789 109 839 187 c
+890 265 915 380 915 532 c
+915 684 890 799 839 876 c
+789 954 715 993 616 993 c
+517 993 443 954 392 876 c
+342 799 317 684 317 532 c
+317 380 342 265 393 187 c
+444 109 518 70 616 70 c
+
+616 -29 m
+461 -29 337 22 243 124 c
+149 227 102 363 102 532 c
+102 701 149 837 242 939 c
+336 1041 461 1092 616 1092 c
+771 1092 896 1041 989 939 c
+1083 837 1130 701 1130 532 c
+1130 363 1083 227 989 124 c
+896 22 771 -29 616 -29 c
+
+ce} _d
+/p{1311 0 59 -426 1208 1092 sc
+420 584 m
+420 479 l
+420 351 444 253 493 186 c
+542 119 614 86 707 86 c
+801 86 872 124 920 199 c
+969 274 993 385 993 532 c
+993 679 969 790 920 865 c
+872 940 801 977 707 977 c
+614 977 542 943 493 876 c
+444 809 420 711 420 584 c
+
+236 956 m
+59 956 l
+59 1063 l
+420 1063 l
+420 897 l
+456 964 502 1013 557 1044 c
+613 1076 682 1092 764 1092 c
+895 1092 1001 1040 1084 937 c
+1167 834 1208 699 1208 532 c
+1208 365 1167 230 1084 126 c
+1001 23 895 -29 764 -29 c
+682 -29 613 -13 557 18 c
+502 50 456 99 420 166 c
+420 -319 l
+594 -319 l
+594 -426 l
+59 -426 l
+59 -319 l
+236 -319 l
+236 956 l
+
+ce} _d
+/r{979 0 74 0 979 1092 sc
+979 1065 m
+979 799 l
+873 799 l
+870 852 855 891 829 917 c
+803 943 765 956 715 956 c
+624 956 555 925 506 862 c
+458 799 434 709 434 592 c
+434 106 l
+647 106 l
+647 0 l
+84 0 l
+84 106 l
+250 106 l
+250 958 l
+74 958 l
+74 1063 l
+434 1063 l
+434 874 l
+470 948 516 1003 573 1038 c
+630 1074 699 1092 780 1092 c
+810 1092 841 1090 874 1085 c
+907 1080 942 1074 979 1065 c
+
+ce} _d
+/s{1051 0 115 -29 946 1092 sc
+115 59 m
+115 307 l
+221 307 l
+224 228 248 168 295 129 c
+342 90 412 70 504 70 c
+587 70 650 85 693 116 c
+736 147 758 193 758 252 c
+758 299 742 336 710 365 c
+679 394 612 424 510 457 c
+377 502 l
+286 531 219 568 178 612 c
+137 656 117 712 117 780 c
+117 877 153 954 224 1009 c
+295 1064 394 1092 520 1092 c
+576 1092 635 1085 697 1070 c
+759 1055 823 1034 889 1006 c
+889 774 l
+783 774 l
+780 843 756 896 711 935 c
+666 974 604 993 526 993 c
+449 993 390 979 350 952 c
+311 925 291 884 291 829 c
+291 784 306 748 336 721 c
+366 694 426 667 516 639 c
+662 594 l
+763 563 835 523 879 476 c
+924 429 946 369 946 295 c
+946 194 907 115 830 57 c
+753 -0 647 -29 512 -29 c
+443 -29 376 -22 311 -7 c
+246 8 180 30 115 59 c
+
+ce} _d
+/u{1319 0 55 -29 1243 1063 sc
+725 1063 m
+1069 1063 l
+1069 106 l
+1243 106 l
+1243 0 l
+885 0 l
+885 188 l
+851 117 807 63 753 26 c
+699 -11 636 -29 565 -29 c
+447 -29 360 4 304 71 c
+249 138 221 244 221 387 c
+221 956 l
+55 956 l
+55 1063 l
+406 1063 l
+406 444 l
+406 315 422 226 453 178 c
+485 130 542 106 623 106 c
+708 106 773 137 818 200 c
+863 263 885 354 885 473 c
+885 956 l
+725 956 l
+725 1063 l
+
+ce} _d
+/y{1157 0 -6 -455 1151 1063 sc
+442 -195 m
+512 -18 l
+115 956 l
+-6 956 l
+-6 1063 l
+483 1063 l
+483 956 l
+313 956 l
+612 225 l
+911 956 l
+752 956 l
+752 1063 l
+1151 1063 l
+1151 956 l
+1032 956 l
+545 -240 l
+512 -323 475 -379 434 -409 c
+393 -440 336 -455 262 -455 c
+231 -455 198 -452 165 -447 c
+132 -442 99 -434 66 -424 c
+66 -221 l
+160 -221 l
+164 -266 175 -299 194 -318 c
+213 -338 243 -348 283 -348 c
+320 -348 349 -338 371 -317 c
+394 -297 417 -256 442 -195 c
+
+ce} _d
+end readonly def
+
+/BuildGlyph {
+ exch begin
+ CharStrings exch
+ 2 copy known not {pop /.notdef} if
+ true 3 1 roll get exec
+ end
+} _d
+
+/BuildChar {
+ 1 index /Encoding get exch get
+ 1 index /BuildGlyph get exec
+} _d
+
+FontName currentdict end definefont pop
+end
+%%EndProlog
+mpldict begin
+75.6 223.2 translate
+460.8 345.6 0 0 clipbox
+gsave
+0 0 m
+460.8 0 l
+460.8 345.6 l
+0 345.6 l
+cl
+1.000 setgray
+fill
+grestore
+gsave
+108.312798 56.796 m
+433.392671 56.796 l
+433.392671 330.048 l
+108.312798 330.048 l
+cl
+1.000 setgray
+fill
+grestore
+0.800 setlinewidth
+1 setlinejoin
+1 setlinecap
+[] 0 setdash
+0.800 setgray
+gsave
+325.08 273.252 108.313 56.796 clipbox
+108.312798 56.796 m
+108.312798 330.048 l
+stroke
+grestore
+0.150 setgray
+gsave
+96.3128 38.1991 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/one glyphshow
+8.43005 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+16.9782 5.24062 moveto
+/one glyphshow
+grestore
+0.800 setgray
+gsave
+325.08 273.252 108.313 56.796 clipbox
+173.328772 56.796 m
+173.328772 330.048 l
+stroke
+grestore
+0.150 setgray
+gsave
+161.329 38.1991 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/one glyphshow
+8.43005 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+16.9782 5.24062 moveto
+/two glyphshow
+grestore
+0.800 setgray
+gsave
+325.08 273.252 108.313 56.796 clipbox
+238.344747 56.796 m
+238.344747 330.048 l
+stroke
+grestore
+0.150 setgray
+gsave
+226.345 38.1991 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/one glyphshow
+8.43005 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+16.9782 5.24062 moveto
+/three glyphshow
+grestore
+0.800 setgray
+gsave
+325.08 273.252 108.313 56.796 clipbox
+303.360722 56.796 m
+303.360722 330.048 l
+stroke
+grestore
+0.150 setgray
+gsave
+291.361 38.1991 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/one glyphshow
+8.43005 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+16.9782 5.24062 moveto
+/four glyphshow
+grestore
+0.800 setgray
+gsave
+325.08 273.252 108.313 56.796 clipbox
+368.376696 56.796 m
+368.376696 330.048 l
+stroke
+grestore
+0.150 setgray
+gsave
+356.377 38.1991 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.635938 moveto
+/one glyphshow
+8.43005 0.635938 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+16.9782 5.36094 moveto
+/five glyphshow
+grestore
+0.800 setgray
+gsave
+325.08 273.252 108.313 56.796 clipbox
+433.392671 56.796 m
+433.392671 330.048 l
+stroke
+grestore
+0.150 setgray
+gsave
+421.393 38.1991 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/one glyphshow
+8.43005 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+16.9782 5.24062 moveto
+/six glyphshow
+grestore
+/DejaVuSerif 14.400 selectfont
+gsave
+
+198.384 20.8554 translate
+0 rotate
+0 0 m /N glyphshow
+12.5781 0 m /u glyphshow
+21.8362 0 m /m glyphshow
+35.4672 0 m /b glyphshow
+44.6692 0 m /e glyphshow
+53.1763 0 m /r glyphshow
+60.0479 0 m /space glyphshow
+64.6173 0 m /o glyphshow
+73.2718 0 m /f glyphshow
+78.5922 0 m /space glyphshow
+83.1616 0 m /S glyphshow
+93.0093 0 m /a glyphshow
+101.58 0 m /m glyphshow
+115.211 0 m /p glyphshow
+124.413 0 m /l glyphshow
+129.01 0 m /e glyphshow
+137.517 0 m /s glyphshow
+grestore
+gsave
+33.7128 61.3028 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/two glyphshow
+8.43005 0.515625 moveto
+/period glyphshow
+12.2668 0.515625 moveto
+/two glyphshow
+23.2783 0.515625 moveto
+/multiply glyphshow
+36.9618 0.515625 moveto
+/one glyphshow
+45.3918 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+53.94 5.24062 moveto
+/minus glyphshow
+61.7115 5.24062 moveto
+/two glyphshow
+grestore
+gsave
+32.7128 111.767 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/two glyphshow
+8.43005 0.515625 moveto
+/period glyphshow
+12.6418 0.515625 moveto
+/four glyphshow
+23.6533 0.515625 moveto
+/multiply glyphshow
+37.3368 0.515625 moveto
+/one glyphshow
+45.7668 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+54.315 5.24062 moveto
+/minus glyphshow
+62.0865 5.24062 moveto
+/two glyphshow
+grestore
+gsave
+32.7128 158.19 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/two glyphshow
+8.43005 0.515625 moveto
+/period glyphshow
+12.6418 0.515625 moveto
+/six glyphshow
+23.6533 0.515625 moveto
+/multiply glyphshow
+37.3368 0.515625 moveto
+/one glyphshow
+45.7668 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+54.315 5.24062 moveto
+/minus glyphshow
+62.0865 5.24062 moveto
+/two glyphshow
+grestore
+gsave
+33.7128 201.171 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/two glyphshow
+8.43005 0.515625 moveto
+/period glyphshow
+12.1418 0.515625 moveto
+/eight glyphshow
+23.1533 0.515625 moveto
+/multiply glyphshow
+36.8368 0.515625 moveto
+/one glyphshow
+45.2668 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+53.815 5.24062 moveto
+/minus glyphshow
+61.5865 5.24062 moveto
+/two glyphshow
+grestore
+gsave
+45.7128 241.186 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/three glyphshow
+11.0115 0.515625 moveto
+/multiply glyphshow
+24.6949 0.515625 moveto
+/one glyphshow
+33.125 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+41.6732 5.24062 moveto
+/minus glyphshow
+49.4446 5.24062 moveto
+/two glyphshow
+grestore
+gsave
+33.7128 278.616 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/three glyphshow
+8.43005 0.515625 moveto
+/period glyphshow
+12.2668 0.515625 moveto
+/two glyphshow
+23.2783 0.515625 moveto
+/multiply glyphshow
+36.9618 0.515625 moveto
+/one glyphshow
+45.3918 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+53.94 5.24062 moveto
+/minus glyphshow
+61.7115 5.24062 moveto
+/two glyphshow
+grestore
+gsave
+32.7128 313.777 translate
+0 rotate
+/DejaVuSerif 13.200000000000001 selectfont
+0 0.515625 moveto
+/three glyphshow
+8.43005 0.515625 moveto
+/period glyphshow
+12.6418 0.515625 moveto
+/four glyphshow
+23.6533 0.515625 moveto
+/multiply glyphshow
+37.3368 0.515625 moveto
+/one glyphshow
+45.7668 0.515625 moveto
+/zero glyphshow
+/DejaVuSerif 9.24 selectfont
+54.315 5.24062 moveto
+/minus glyphshow
+62.0865 5.24062 moveto
+/two glyphshow
+grestore
+/DejaVuSerif 14.400 selectfont
+gsave
+
+25.6034 141.984 translate
+90 rotate
+0 0 m /T glyphshow
+9.58801 0 m /r glyphshow
+16.4597 0 m /a glyphshow
+25.0299 0 m /i glyphshow
+29.6274 0 m /n glyphshow
+38.8855 0 m /i glyphshow
+43.483 0 m /n glyphshow
+52.7411 0 m /g glyphshow
+61.9431 0 m /space glyphshow
+66.5125 0 m /T glyphshow
+76.1005 0 m /i glyphshow
+80.6979 0 m /m glyphshow
+94.3289 0 m /e glyphshow
+grestore
+1.200 setlinewidth
+0.122 0.467 0.706 setrgbcolor
+gsave
+325.08 273.252 108.313 56.796 clipbox
+283.788963 308.124776 m
+stroke
+grestore
+gsave
+325.08 273.252 108.313 56.796 clipbox
+283.788963 299.902455 m
+283.788963 317.627455 l
+stroke
+grestore
+0 setlinecap
+[4.8 1.8] 0 setdash
+gsave
+325.08 273.252 108.313 56.796 clipbox
+283.788963 141.977109 m
+stroke
+grestore
+1 setlinecap
+[] 0 setdash
+gsave
+325.08 273.252 108.313 56.796 clipbox
+283.788963 134.698126 m
+283.788963 149.137791 l
+stroke
+grestore
+0 setlinecap
+[1.2 1.2] 0 setdash
+gsave
+325.08 273.252 108.313 56.796 clipbox
+283.788963 81.229542 m
+stroke
+grestore
+1 setlinecap
+[] 0 setdash
+gsave
+325.08 273.252 108.313 56.796 clipbox
+283.788963 69.216545 m
+283.788963 92.851816 l
+stroke
+grestore
+1.000 setlinewidth
+0 setlinejoin
+2 setlinecap
+0.800 setgray
+gsave
+108.312798 56.796 m
+108.312798 330.048 l
+stroke
+grestore
+gsave
+433.392671 56.796 m
+433.392671 330.048 l
+stroke
+grestore
+gsave
+108.312798 56.796 m
+433.392671 56.796 l
+stroke
+grestore
+gsave
+108.312798 330.048 m
+433.392671 330.048 l
+stroke
+grestore
+0.800 setlinewidth
+0 setlinecap
+gsave
+342.834546 241.61925 m
+424.152671 241.61925 l
+425.912671 241.61925 426.792671 242.49925 426.792671 244.25925 c
+426.792671 320.808 l
+426.792671 322.568 425.912671 323.448 424.152671 323.448 c
+342.834546 323.448 l
+341.074546 323.448 340.194546 322.568 340.194546 320.808 c
+340.194546 244.25925 l
+340.194546 242.49925 341.074546 241.61925 342.834546 241.61925 c
+cl
+gsave
+1.000 setgray
+fill
+grestore
+stroke
+grestore
+0.150 setgray
+/DejaVuSerif 14.400 selectfont
+gsave
+
+359.431 307.527 translate
+0 rotate
+0 0 m /K glyphshow
+10.3641 0 m /e glyphshow
+18.8712 0 m /r glyphshow
+25.7429 0 m /n glyphshow
+35.001 0 m /e glyphshow
+43.5081 0 m /l glyphshow
+grestore
+1.200 setlinewidth
+1 setlinejoin
+1 setlinecap
+0.122 0.467 0.706 setrgbcolor
+gsave
+345.474546 292.766125 m
+358.674546 292.766125 l
+371.874546 292.766125 l
+stroke
+grestore
+0.150 setgray
+/DejaVuSerif 13.200 selectfont
+gsave
+
+382.435 288.146 translate
+0 rotate
+0 0 m /r glyphshow
+6.33386 0 m /b glyphshow
+14.8157 0 m /f glyphshow
+grestore
+0 setlinecap
+[4.8 1.8] 0 setdash
+0.122 0.467 0.706 setrgbcolor
+gsave
+345.474546 273.588 m
+358.674546 273.588 l
+371.874546 273.588 l
+stroke
+grestore
+0.150 setgray
+/DejaVuSerif 13.200 selectfont
+gsave
+
+382.435 268.968 translate
+0 rotate
+0 0 m /p glyphshow
+8.48181 0 m /o glyphshow
+16.459 0 m /l glyphshow
+20.6967 0 m /y glyphshow
+grestore
+[1.2 1.2] 0 setdash
+0.122 0.467 0.706 setrgbcolor
+gsave
+345.474546 254.222375 m
+358.674546 254.222375 l
+371.874546 254.222375 l
+stroke
+grestore
+0.150 setgray
+/DejaVuSerif 13.200 selectfont
+gsave
+
+382.435 249.602 translate
+0 rotate
+0 0 m /l glyphshow
+4.23767 0 m /i glyphshow
+8.47534 0 m /n glyphshow
+17.0089 0 m /e glyphshow
+24.8502 0 m /a glyphshow
+32.7498 0 m /r glyphshow
+grestore
+
+end
+showpage
diff --git a/examples/security/truthseeker/retrain.py b/examples/security/truthseeker/retrain.py
index 6b91b13c..4a0928a4 100644
--- a/examples/security/truthseeker/retrain.py
+++ b/examples/security/truthseeker/retrain.py
@@ -236,9 +236,9 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list:
# Parse Model Results
results = pd.read_csv("output/train.csv")
# Some convenient variable names
-# input_size = results["data.generate.kwargs.n_samples"] * results["data.generate.kwargs.n_features"]
-results["Kernel"] = results["model.init.kwargs.kernel"].copy()
-# results["Features"] = results["data.generate.kwargs.n_features"].copy()
+# input_size = results["data.generate.n_samples"] * results["data.generate.n_features"]
+results["Kernel"] = results["model.init.kernel"].copy()
+# results["Features"] = results["data.generate.n_features"].copy()
# results["Samples"] = results["data.sample.train_size"].copy()
# results["input_size"] = input_size
# Clean up results
@@ -249,7 +249,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list:
results[col] = results[col].apply(lambda x: x[0])
# Subset results
# subset = results[results["data.sample.train_size"] == 10000]
-# subset = subset[subset["data.generate.kwargs.n_features"] == 100]
+# subset = subset[subset["data.generate.n_features"] == 100]
with open("conf/model/best_rbf.yaml", "r") as f:
best_rbf = yaml.safe_load(f)
best_rbf["init"].pop("_target_", None)
@@ -341,7 +341,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list:
params = json.load(f)
else:
raise ValueError(f"No params file found for {folder}")
- attack_params = params["attack"]["init"]["kwargs"]
+ attack_params = params["attack"]["init"]
attack_params.update({"name": params["attack"]["init"]["name"]})
confidence_ser["Kernel"] = name
confidence_ser["Average False Confidence"] = avg_prob
@@ -429,7 +429,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list:
else:
logger.warning(f"No params file found for {folder}")
continue
- attack_params = params["attack"]["init"]["kwargs"]
+ attack_params = params["attack"]["init"]
attack_params.update({"name": params["attack"]["init"]["name"]})
confidence_ser["Kernel"] = name
confidence_ser["Average False Confidence After Retraining"] = avg_prob