diff --git a/deckard/__main__.py b/deckard/__main__.py index cbd1505f..87a38abc 100644 --- a/deckard/__main__.py +++ b/deckard/__main__.py @@ -1,106 +1,80 @@ #!/usr/bin/env python3 -import argparse -import subprocess +import sys import logging -from pathlib import Path from omegaconf import OmegaConf -from .layers.parse import save_params_file +from .layers.afr import afr_parser, afr_main +from .layers.attack import attack_parser, attack_main +from .layers.clean_data import clean_data_parser, clean_data_main +from .layers.compile import compile_parser, compile_main +from .layers.data import data_parser, data_main +from .layers.experiment import experiment_parser, experiment_main +from .layers.find_best import find_best_parser, find_best_main +from .layers.generate_grid import generate_grid_parser, generate_grid_main +from .layers.hydra_test import hydra_test_main +from .layers.merge import merge_parser, merge_main +from .layers.optimise import optimise_main +from .layers.parse import hydra_parser, parse_hydra_config +from .layers.plots import plots_parser, plots_main +from .layers.prepare_queue import prepare_queue_main +from .layers.query_kepler import kepler_parser, kepler_main OmegaConf.register_new_resolver("eval", eval) logger = logging.getLogger(__name__) -layer_list = list(Path(Path(__file__).parent, "layers").glob("*.py")) -layer_list = [layer.stem for layer in layer_list] -if "__init__" in layer_list: - layer_list.remove("__init__") -layer_list.append(None) +layer_list = [ + "afr", + "attack", + "clean_data" "compile", + "data", + "experiment", + "find_best", + "generate_grid", + "hydra_test", + "merge", + "optimise", + "parse", + "plots", + "prepare_queue", + "query_kepler", +] -def run_submodule(submodule, args): - if len(args) == 0: - cmd = f"python -m deckard.layers.{submodule}" - else: - cmd = f"python -m deckard.layers.{submodule} {args}" - logger.info(f"Running {cmd}") - with subprocess.Popen( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=True, - ) as proc: - for line in proc.stdout: - print(line.rstrip().decode("utf-8")) - if proc.returncode != 0: - logger.error(f"Error running {cmd}") - for line in proc.stderr: - logger.error(line.rstrip().decode("utf-8")) - return 1 - else: - return 0 +deckard_layer_dict = { + "afr": (afr_parser, afr_main), + "attack": (attack_parser, attack_main), + "clean_data": (clean_data_parser, clean_data_main), + "compile": (compile_parser, compile_main), + "data": (data_parser, data_main), + "experiment": (experiment_parser, experiment_main), + "find_best": (find_best_parser, find_best_main), + "generate_grid": (generate_grid_parser, generate_grid_main), + "hydra_test": (None, hydra_test_main), + "merge": (merge_parser, merge_main), + "optimise": (None, optimise_main), + "parse": (hydra_parser, parse_hydra_config), + "plots": (plots_parser, plots_main), + "prepare_queue": (None, prepare_queue_main), + "query_kepler": (kepler_parser, kepler_main), +} +assert len(deckard_layer_dict) == len( + layer_list, +), "Some layers are missing from the deckard_layer_dict" -def parse_and_repro(args, default_config="default.yaml", config_dir="conf"): - if len(args) == 0: - assert ( - save_params_file( - config_dir=( - Path(Path(), config_dir) - if not Path(config_dir).is_absolute() - else Path(config_dir) - ), - config_file=default_config, - ) - is None - ) - assert Path(Path(), "params.yaml").exists() - else: - cmd = f"python -m deckard.layers.parse {args} --config_file {default_config}" - # error = f"error parsing command: {cmd} {args}" - with subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) as proc: - for line in proc.stdout: - print(line.rstrip().decode("utf-8")) - if Path(Path(), "dvc.yaml").exists(): - cmd = "dvc repro" - with subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) as proc: - for line in proc.stdout: - print(line.rstrip().decode("utf-8")) - - else: - raise ValueError("No dvc.yaml file found. Please construct a pipeline.") - return 0 +def main(layer, args): + # Get the layer and the main function for the layer. + if layer not in deckard_layer_dict: + raise ValueError(f"Layer {layer} not found.") + parser, sub_main = deckard_layer_dict[layer] + # Parse the arguments. + args = parser.parse_args(args.args) + # Print the arguments and values + # Run the main function. + sub_main(args) if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) - parser = argparse.ArgumentParser() - parser.add_argument( - "--submodule", - type=str, - help=f"Submodule to run. Choices: {layer_list}", - ) - parser.add_argument( - "--config_file", - type=str, - help="default hydra configuration file that you would like to reproduce with dvc repro.", - ) - parser.add_argument("--config_dir", type=str, default="conf") - parser.add_argument("other_args", type=str, nargs="*") - args = parser.parse_args() - submodule = args.submodule - if submodule is not None: - assert ( - args.config_file is None - ), "config_file and submodule cannot be specified at the same time" - if submodule not in layer_list and submodule is not None: - raise ValueError(f"Submodule {submodule} not found. Choices: {layer_list}") - if len(args.other_args) > 0: - other_args = " ".join(args.other_args) - else: - other_args = [] - if submodule is None: - assert ( - parse_and_repro(other_args, args.config_file, config_dir=args.config_dir) - == 0 - ) - else: - assert run_submodule(submodule, other_args) == 0 + # pop the first argument which is the script name + layer = sys.argv.pop(1) + # pass the rest of the arguments to the main function + main(layer, sys.argv) diff --git a/deckard/layers/afr.py b/deckard/layers/afr.py index 41c7c4dc..c69e7887 100644 --- a/deckard/layers/afr.py +++ b/deckard/layers/afr.py @@ -28,6 +28,14 @@ logger = logging.getLogger(__name__) +__all__ = [ + "afr_main", + "survival_probability_calibration", + "fit_aft", + "plot_aft", + "afr_parser", +] + # Modified from https://github.com/CamDavidsonPilon/lifelines/blob/master/lifelines/calibration.py def survival_probability_calibration( @@ -872,7 +880,7 @@ def calculate_raw_failures(args, data, config): return data -def main(args): +def afr_main(args): target = args.target duration_col = args.duration_col dataset = args.dataset @@ -929,4 +937,4 @@ def main(args): afr_parser.add_argument("--config_file", type=str, default="afr.yaml") afr_parser.add_argument("--plots_folder", type=str, default="plots") args = afr_parser.parse_args() - main(args) + afr_main(args) diff --git a/deckard/layers/clean_data.py b/deckard/layers/clean_data.py index 9fdd30d9..615a563b 100644 --- a/deckard/layers/clean_data.py +++ b/deckard/layers/clean_data.py @@ -478,7 +478,9 @@ def replace_strings_in_data(data, replace_dict): v, dict, ), f"Value for key {k} in replace_dict is not a dictionary." - assert k in data.columns, f"Key {k} not in data.columns." + if k not in data.columns: + logger.warning(f"Column {k} not in data. Ignoring.") + continue for k1, v1 in v.items(): logger.info(f"Replacing {k1} with {v1} in {k}...") k1 = str(k1) @@ -610,41 +612,41 @@ def drop_values(data, drop_dict): return data -parser = argparse.ArgumentParser() -parser.add_argument( +clean_data_parser = argparse.ArgumentParser() +clean_data_parser.add_argument( "-i", "--input_file", type=str, help="Data file to read from", required=True, ) -parser.add_argument( +clean_data_parser.add_argument( "-o", "--output_file", type=str, help="Data file to read from", required=True, ) -parser.add_argument( +clean_data_parser.add_argument( "-v", "--verbosity", default="INFO", help="Increase output verbosity", ) -parser.add_argument( +clean_data_parser.add_argument( "-c", "--config", help="Path to the config file", default="clean.yaml", ) -parser.add_argument( +clean_data_parser.add_argument( "-s", "--subset", help="Subset of data you would like to plot", default=None, nargs="?", ) -parser.add_argument( +clean_data_parser.add_argument( "-d", "--drop_if_empty", help="Drop row if this columns is empty", @@ -656,14 +658,14 @@ def drop_values(data, drop_dict): "predict_time", ], ) -parser.add_argument( +clean_data_parser.add_argument( "--pareto_dict", help="Path to (optional) pareto set dictionary.", default=None, ) -def main(args): +def clean_data_main(args): logging.basicConfig(level=args.verbosity) assert Path( args.input_file, @@ -726,5 +728,5 @@ def main(args): if __name__ == "__main__": - args = parser.parse_args() - main(args) + args = clean_data_parser.parse_args() + clean_data_main(args) diff --git a/deckard/layers/compile.py b/deckard/layers/compile.py index 4a33e818..28a33a56 100644 --- a/deckard/layers/compile.py +++ b/deckard/layers/compile.py @@ -4,6 +4,7 @@ import logging from tqdm import tqdm import yaml +import argparse logger = logging.getLogger(__name__) @@ -172,13 +173,13 @@ def load_results(results_file, results_folder) -> pd.DataFrame: Path(results_folder).mkdir(exist_ok=True, parents=True) suffix = results_file.suffix if suffix == ".csv": - results = pd.read_csv(results_file) + results = pd.read_csv(results_file, index_col=0) elif suffix == ".xlsx": - results = pd.read_excel(results_file) + results = pd.read_excel(results_file, index_col=0) elif suffix == ".html": - results = pd.read_html(results_file) + results = pd.read_html(results_file, index_col=0) elif suffix == ".json": - results = pd.read_json(results_file) + results = pd.read_json(results_file, index_col=0) elif suffix == ".tex": pd.read_csv( results_file, @@ -187,6 +188,7 @@ def load_results(results_file, results_folder) -> pd.DataFrame: skiprows=4, skipfooter=3, engine="python", + index_col=0, ) else: raise ValueError(f"File type {suffix} not supported.") @@ -196,16 +198,7 @@ def load_results(results_file, results_folder) -> pd.DataFrame: return results -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser() - parser.add_argument("--results_file", type=str, default="results.csv") - parser.add_argument("--report_folder", type=str, default="reports", required=True) - parser.add_argument("--results_folder", type=str, default=".") - parser.add_argument("--exclude", type=list, default=None, nargs="*") - parser.add_argument("--verbose", type=str, default="INFO") - args = parser.parse_args() +def compile_main(parse_results, save_results, args): logging.basicConfig(level=args.verbose) report_folder = args.report_folder results_file = args.results_file @@ -215,3 +208,20 @@ def load_results(results_file, results_folder) -> pd.DataFrame: assert Path( report_file, ).exists(), f"Results file {report_file} does not exist. Something went wrong." + + +compile_parser = argparse.ArgumentParser() +compile_parser.add_argument("--results_file", type=str, default="results.csv") +compile_parser.add_argument( + "--report_folder", + type=str, + default="reports", + required=True, +) +compile_parser.add_argument("--results_folder", type=str, default=".") +compile_parser.add_argument("--exclude", type=list, default=None, nargs="*") +compile_parser.add_argument("--verbose", type=str, default="INFO") + +if __name__ == "__main__": + args = compile_parser.parse_args() + compile_main(parse_results, save_results, args) diff --git a/deckard/layers/deploy.py b/deckard/layers/deploy.py deleted file mode 100644 index a1fe99ed..00000000 --- a/deckard/layers/deploy.py +++ /dev/null @@ -1,23 +0,0 @@ -import logging -import argparse -from pathlib import Path -import yaml -from ..iaac import GCP_Config - - -logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) -if __name__ == "__main__": - iaac_parser = argparse.ArgumentParser() - iaac_parser.add_argument("--verbosity", type=str, default="INFO") - iaac_parser.add_argument("--config_dir", type=str, default="conf/deploy") - iaac_parser.add_argument("--config_file", type=str, default="default.yaml") - iaac_parser.add_argument("--workdir", type=str, default=".") - args = iaac_parser.parse_args() - config_dir = Path(args.workdir, args.config_dir).resolve().as_posix() - config_file = Path(config_dir, args.config_file).resolve().as_posix() - with open(config_file, "r") as f: - params = yaml.load(f, Loader=yaml.FullLoader) - gcp = GCP_Config(**params) - logging.basicConfig(level=args.verbosity) - assert gcp() is None, "Error creating cluster" diff --git a/deckard/layers/find_best.py b/deckard/layers/find_best.py index 9cb34315..7cebd456 100644 --- a/deckard/layers/find_best.py +++ b/deckard/layers/find_best.py @@ -25,6 +25,19 @@ def find_optuna_best( ): logger.info(f"Study name: {study_name}") logger.info(f"Storage name: {storage_name}") + # Validate the directions + if isinstance(direction, str): + directions = [direction] + else: + assert isinstance( + directions, + list, + ), f"Directions is not a list: {type(directions)}" + for direction in directions: + assert direction in [ + "minimize", + "maximize", + ], f"Direction {direction} not recognized." if isinstance(direction, str): study = optuna.create_study( study_name=study_name, @@ -41,9 +54,67 @@ def find_optuna_best( directions=direction, ) directions = direction - assert isinstance(directions, list), f"Directions is not a list: {type(directions)}" + # Convert directions to bools + directions = [False if x == "maximize" else True for x in directions] + # Get the trials dataframe df = study.trials_dataframe(attrs=("number", "value", "params")) # Find the average of each value over the columns in average_over + # df = group_by_params(df) + if study_csv is not None: + Path(study_csv).parent.mkdir(parents=True, exist_ok=True) + df.to_csv(study_csv) + # To dotlist + params = merge_best_with_default( + config_folder, + default_config, + config_subdir, + study, + ) + if params_file is not None: + params_file = create_new_config_in_subdir( + params_file, + config_folder, + default_config, + config_subdir, + params, + ) + return params + + +def merge_best_with_default( + config_folder, + default_config, + config_subdir, + study, + use_optuna_best=True, +): + if use_optuna_best is True: + best_params = flatten_dict(study.best_params) + more_params = flatten_dict(study.best_trial.user_attrs) + even_more_params = flatten_dict(study.best_trial.system_attrs) + logger.debug(f"Best params: {best_params}") + logger.debug(f"Best user params: {more_params}") + logger.debug(f"Best system params: {even_more_params}") + else: + raise NotImplementedError("Not implemented yet.") + # Merge all the params + best_params = OmegaConf.to_container( + OmegaConf.merge(best_params, more_params, even_more_params), + resolve=False, + ) + # to dotlist + best_params = flatten_dict(best_params) + overrides = get_overrides(config_subdir, best_params) + params = override_default_with_best( + config_folder, + default_config, + overrides, + config_subdir=config_subdir, + ) + return params + + +def group_by_params(df): not_these = ["number", "value"] val_cols = [ col @@ -51,11 +122,9 @@ def find_optuna_best( if col.startswith("values_") and col.split("values_")[-1] not in not_these ] not_these.extend(val_cols) - print(f"Not these: {not_these}") groupby_cols = [ col for col in df.columns if col.split("params_")[-1] not in not_these ] - print(f"Groupby cols: {groupby_cols}") dfs = df.groupby(groupby_cols) new_df = pd.DataFrame(columns=groupby_cols + ["mean", "std", "ntrials", "nuniques"]) means = [] @@ -82,30 +151,11 @@ def find_optuna_best( new_df["std"] = stds new_df["ntrials"] = ntrials new_df["nuniques"] = nuniques - for direction in directions: - assert direction in [ - "minimize", - "maximize", - ], f"Direction {direction} not recognized." - directions = [False if x == "maximize" else True for x in directions] - assert isinstance(new_df, pd.DataFrame), f"df is not a dataframe: {type(df)}" - if study_csv is not None: - Path(study_csv).parent.mkdir(parents=True, exist_ok=True) - df.to_csv(study_csv) - # To dotlist - best_params = flatten_dict(study.best_params) - more_params = flatten_dict(study.best_trial.user_attrs) - even_more_params = flatten_dict(study.best_trial.system_attrs) - logger.debug(f"Best params: {best_params}") - logger.debug(f"Best user params: {more_params}") - logger.debug(f"Best system params: {even_more_params}") - # Merge all the params - best_params = OmegaConf.to_container( - OmegaConf.merge(best_params, more_params, even_more_params), - resolve=False, - ) - # to dotlist - best_params = flatten_dict(best_params) + assert isinstance(new_df, pd.DataFrame), f"df is not a dataframe: {type(new_df)}" + return new_df + + +def get_overrides(config_subdir, best_params): overrides = [] # Changing the keys to hydra override format for key, value in best_params.items(): @@ -130,21 +180,7 @@ def find_optuna_best( logger.info(f"Adding {key} to param list") else: logger.debug(f"Skipping {key} because it is not in {config_subdir}") - params = override_default_with_best( - config_folder, - default_config, - overrides, - config_subdir=config_subdir, - ) - if params_file is not None: - params_file = create_new_config_in_subdir( - params_file, - config_folder, - default_config, - config_subdir, - params, - ) - return params + return overrides def create_new_config_in_subdir( @@ -176,7 +212,6 @@ def create_new_config_in_subdir( with open(params_file.with_suffix(".yaml"), "w") as f: yaml.dump(params, f) assert params_file.exists(), f"{params_file.resolve().as_posix()} does not exist." - return params_file @@ -195,27 +230,25 @@ def override_default_with_best( return cfg -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--params_file", type=str, default=True) - - parser.add_argument("--study_csv", type=str, default=None) - parser.add_argument("--config_folder", type=str, default=Path(Path(), "conf")) - parser.add_argument("--default_config", type=str, default="default") - parser.add_argument("--config_subdir", type=str, default=None) - parser.add_argument("--study_name", type=str, required=True) - parser.add_argument("--config_name", type=str) - parser.add_argument("--verbosity", type=str, default="INFO") - parser.add_argument("--storage_name", type=str, required=True) - parser.add_argument("--direction", type=str, default="maximize") - parser.add_argument("--study_type", type=str, default="optuna") - args = parser.parse_args() +find_best_parser = argparse.ArgumentParser() +find_best_parser.add_argument("--params_file", type=str, default=True) +find_best_parser.add_argument("--study_csv", type=str, default=None) +find_best_parser.add_argument("--config_folder", type=str, default=Path(Path(), "conf")) +find_best_parser.add_argument("--default_config", type=str, default="default") +find_best_parser.add_argument("--config_subdir", type=str, default=None) +find_best_parser.add_argument("--study_name", type=str, required=True) +find_best_parser.add_argument("--config_name", type=str) +find_best_parser.add_argument("--verbosity", type=str, default="INFO") +find_best_parser.add_argument("--storage_name", type=str, required=True) +find_best_parser.add_argument("--direction", type=str, default="maximize") +find_best_parser.add_argument("--study_type", type=str, default="optuna") + + +def find_best_main(find_optuna_best, args): args.config_folder = Path(args.config_folder).resolve().as_posix() logging if args.study_type == "optuna": - study_name = args.study_name - storage_name = args.storage_name direction = args.direction if len(direction) == 1: direction = direction[0] @@ -231,3 +264,8 @@ def override_default_with_best( ) else: raise NotImplementedError(f"Study type {args.study_type} not implemented.") + + +if __name__ == "__main__": + args = find_best_parser.parse_args() + find_best_main(find_optuna_best, args) diff --git a/deckard/layers/generate_grid.py b/deckard/layers/generate_grid.py index 487ce801..66c9628f 100644 --- a/deckard/layers/generate_grid.py +++ b/deckard/layers/generate_grid.py @@ -4,6 +4,7 @@ import yaml from functools import reduce from operator import mul +import argparse from ..base.utils import make_grid, my_hash logger = logging.getLogger(__name__) @@ -74,13 +75,13 @@ def generate_grid_from_folders(conf_dir, regex): return big_list -def generate_queue( - conf_root, - grid_dir, - regex, - queue_folder="queue", - default_file="default.yaml", -): +def generate_grid_main(args): + conf_root = args.conf_root + grid_dir = args.grid_folder + regex = args.regex + queue_folder = args.queue_folder + default_file = args.default_file + output_file = args.output_file this_dir = os.getcwd() conf_dir = os.path.join(this_dir, conf_root, grid_dir) logger.debug(f"Looking for configs in {conf_dir}") @@ -102,12 +103,51 @@ def generate_queue( yaml.dump(big_list[i], outfile, default_flow_style=False) assert Path(path, name + ".yaml").exists() i += 1 + if output_file is not None: + with open(output_file, "w") as outfile: + yaml.dump(big_list, outfile, default_flow_style=False) + assert Path(output_file).exists() return big_list -conf_root = "conf" -grid_folder = "grid" -regex = "*.yaml" - -big_list = generate_queue(conf_root, grid_folder, regex) -print(yaml.dump(big_list[0])) +generate_grid_parser = argparse.ArgumentParser() +generate_grid_parser.add_argument( + "--conf_root", + type=str, + default="conf", + help="Root directory for config files", +) +generate_grid_parser.add_argument( + "--grid_folder", + type=str, + default="grid", + help="Folder containing config files", +) +generate_grid_parser.add_argument( + "--regex", + type=str, + default="*.yaml", + help="Regex for finding config files", +) +generate_grid_parser.add_argument( + "--queue_folder", + type=str, + default="queue", + help="Folder for queue files", +) +generate_grid_parser.add_argument( + "--default_file", + type=str, + default="default.yaml", + help="Default config file", +) +generate_grid_parser.add_argument( + "--output_file", + type=str, + default=None, + help="Output file for grid", +) + +if __name__ == "__main__": + args = generate_grid_parser.parse_args() + generate_grid_main(args) diff --git a/deckard/layers/generate_webpage.py b/deckard/layers/generate_webpage.py deleted file mode 100644 index bd2699c7..00000000 --- a/deckard/layers/generate_webpage.py +++ /dev/null @@ -1,63 +0,0 @@ -import os -import csv -from bs4 import BeautifulSoup - - -def generate_html_file(csv_file_path, output_folder): - # Read the CSV file - with open(csv_file_path, "r") as file: - reader = csv.reader(file) - data = list(reader) - - # Get the title of the CSV file - file_name = os.path.basename(csv_file_path) - title = os.path.splitext(file_name)[0] - - # Create an HTML file path and open the file - html_file_path = os.path.join(output_folder, f"{title}.html") - with open(html_file_path, "w") as html_file: - # Create a BeautifulSoup object - soup = BeautifulSoup("", "html.parser") - - # Add the title to the HTML file - soup.append(BeautifulSoup(f"

{title}

", "html.parser")) - - # Create an HTML table from the CSV data - table_html = "" - for row in data: - table_html += "" - for cell in row: - # Check if the cell is a string representing a valid path - if isinstance(cell, str) and os.path.exists(cell): - # Create a hyperlink with the capitalized name of the file - file_name = os.path.basename(cell) - link_title = os.path.splitext(file_name)[0] - cell = f'{link_title.capitalize()}' - - table_html += f"" - table_html += "" - table_html += "
{cell}
" - - # Add the table to the HTML file - soup.append(BeautifulSoup(table_html, "html.parser")) - - # Write the HTML content to the file - html_file.write(soup.prettify()) - - -def parse_folder(folder_path): - # Create the output folder if it doesn't exist - os.makedirs(folder_path, exist_ok=True) - - # Iterate over the CSV files in the folder - for file_name in os.listdir(folder_path): - if file_name.endswith(".csv"): - csv_file_path = os.path.join(folder_path, file_name) - generate_html_file(csv_file_path, folder_path) - - -# Define the folder path containing CSV files -folder_path = "output/reports" # Update with your folder path - -# Parse the folder and generate HTML files -parse_folder(folder_path) diff --git a/deckard/layers/hydra_test.py b/deckard/layers/hydra_test.py index b21fc076..21db541a 100644 --- a/deckard/layers/hydra_test.py +++ b/deckard/layers/hydra_test.py @@ -1,17 +1,55 @@ from omegaconf import DictConfig, OmegaConf from pathlib import Path +import sys import hydra -import os -working_dir = os.getcwd() -config_path = Path(working_dir, "conf").as_posix() +working_dir = Path().cwd() +config_dir = "conf" +config_path = Path(working_dir, config_dir).as_posix() +config_file = "default" -@hydra.main(version_base=None, config_path=config_path, config_name="default") -def my_app(cfg: DictConfig) -> None: - print(OmegaConf.to_yaml(cfg)) - return 0 +def hydra_test_main(): + # Use sys calls to look for --working_dir, --config_dir, and --config_file + args = sys.argv + if "--working_dir" in args: + working_dir = args[args.index("--working_dir") + 1] + # remove working_dir from args + args.pop(args.index("--working_dir")) + args.pop(args.index(working_dir)) + else: + working_dir = Path().cwd() + if "--config_dir" in args: + config_dir = args[args.index("--config_dir") + 1] + # remove config_dir from args + args.pop(args.index("--config_dir")) + args.pop(args.index(config_dir)) + else: + config_dir = "conf" + if "--config_file" in args: + config_file = args[args.index("--config_file") + 1] + # remove config_file from args + args.pop(args.index("--config_file")) + args.pop(args.index(config_file)) + else: + config_file = "default" + if "--version_base" in args: + version_base = args[args.index("--version_base") + 1] + # remove version_base from args + args.pop(args.index("--version_base")) + args.pop(args.index(version_base)) + else: + version_base = "1.3" + + @hydra.main( + version_base=version_base, + config_path=config_path, + config_name=config_file, + ) + def hydra_main(cfg: DictConfig) -> None: + print(OmegaConf.to_yaml(cfg)) + return 0 if __name__ == "__main__": - my_app() + hydra_test_main() diff --git a/deckard/layers/merge.py b/deckard/layers/merge.py index 13d62ea1..991b554d 100644 --- a/deckard/layers/merge.py +++ b/deckard/layers/merge.py @@ -9,7 +9,7 @@ logger = logging.getLogger(__name__) -__all__ = ["merge_csv", "main", "parser"] +__all__ = ["merge_csv", "merge_main", "merge_parser"] def merge_csv( @@ -129,7 +129,7 @@ def parse_cleaning_config(config_file, metadata_file=None, subset_metadata_file= return dict_ -def main(args): +def merge_main(args): config = parse_cleaning_config(args.config, args.metadata, args.subset_metadata) if args.output_folder is None: args.output_folder = Path().cwd() @@ -199,33 +199,33 @@ def add_subset_metadata(df, metadata_list=[]): return df -parser = argparse.ArgumentParser() -parser.add_argument( +merge_parser = argparse.ArgumentParser() +merge_parser.add_argument( "--output_file", type=str, help="Name of the output file", default="merged.csv", ) -parser.add_argument( +merge_parser.add_argument( "--output_folder", type=str, help="Name of the output folder", required=False, ) -parser.add_argument( +merge_parser.add_argument( "--smaller_file", type=str, help="Name(s) of the files to merge into the big file.", required=False, nargs="*", ) -parser.add_argument( +merge_parser.add_argument( "--config", type=str, help="Name of file containing a 'fillna' config dictionary.", required=False, ) -parser.add_argument( +merge_parser.add_argument( "--metadata", type=str, help="Name of file containing a 'metadata' dictionary.", @@ -233,14 +233,14 @@ def add_subset_metadata(df, metadata_list=[]): # set default to --config default=None, ) -parser.add_argument( +merge_parser.add_argument( "--subset_metadata", type=str, help="Name of file containing a 'subset_metadata' dictionary.", required=False, default=None, ) -parser.add_argument( +merge_parser.add_argument( "--how", type=str, help="Type of merge to perform. Default is 'outer'.", @@ -248,5 +248,5 @@ def add_subset_metadata(df, metadata_list=[]): ) if __name__ == "__main__": - args = parser.parse_args() - main(args) + args = merge_parser.parse_args() + merge_main(args) diff --git a/deckard/layers/optimise.py b/deckard/layers/optimise.py index 9f96bd9c..9c6bfdf9 100644 --- a/deckard/layers/optimise.py +++ b/deckard/layers/optimise.py @@ -188,7 +188,7 @@ def parse_stage(stage: str = None, params: dict = None, path=None) -> dict: key_list.extend(new_keys) else: - raise TypeError(f"Expected str or dict, got {type(params)}") + raise TypeError(f"Expected dict, got {type(params)}") params = read_subset_of_params(key_list, params) # Load files from dvc with open(Path(path, "dvc.yaml"), "r") as f: @@ -215,7 +215,7 @@ def parse_stage(stage: str = None, params: dict = None, path=None) -> dict: if "metrics" in pipe: metric_list = [str(x).split(":")[0] for x in pipe["metrics"]] file_list.extend(metric_list) - file_string = str(file_list) + file_string = str(file_list).replace("item.", "") files = params["files"] file_list = list(files.keys()) for key in file_list: @@ -324,8 +324,8 @@ def optimise(cfg: DictConfig) -> None: logger = logging.getLogger(__name__) @hydra.main(config_path=config_path, config_name=config_name, version_base="1.3") - def hydra_optimise(cfg: DictConfig) -> float: + def optimise_main(cfg: DictConfig) -> float: score = optimise(cfg) return score - hydra_optimise() + optimise_main() diff --git a/deckard/layers/parse.py b/deckard/layers/parse.py index 44a2200b..3a4eec4e 100644 --- a/deckard/layers/parse.py +++ b/deckard/layers/parse.py @@ -5,6 +5,8 @@ from omegaconf import OmegaConf from .utils import save_params_file +__all__ = ["parse_hydra_config", "hydra_parser"] + logger = logging.getLogger(__name__) hydra_parser = argparse.ArgumentParser() hydra_parser.add_argument("overrides", type=str, nargs="*", default=None) diff --git a/deckard/layers/plots.py b/deckard/layers/plots.py index af653714..6e37ce7f 100644 --- a/deckard/layers/plots.py +++ b/deckard/layers/plots.py @@ -5,6 +5,7 @@ import seaborn as sns import yaml from pathlib import Path +import numpy as np logger = logging.getLogger(__name__) sns.set_theme(style="whitegrid", font_scale=1.8, font="times new roman") @@ -35,14 +36,18 @@ def cat_plot( folder, xlabels=None, ylabels=None, + xticklabels=None, + yticklabels=None, titles=None, legend_title=None, x_lim=None, y_lim=None, hue_order=None, rotation=0, - set={}, filetype=".eps", + x_scale=None, + y_scale=None, + digitize=[], **kwargs, ): """ @@ -88,12 +93,16 @@ def cat_plot( """ plt.gcf().clear() + plt.cla() + plt.clf() + # clear the Axes object suffix = Path(file).suffix if suffix is not None: file = Path(file) else: file = Path(file).with_suffix(filetype) logger.info(f"Rendering graph {file}") + data = digitize_cols(data, digitize) if hue is not None: data = data.sort_values(by=[hue, x, y]) logger.debug( @@ -112,12 +121,31 @@ def cat_plot( data = data.sort_values(by=[x, y]) logger.debug(f"Data sorted by x:{x}, y:{y}, kind:{kind}, and kwargs:{kwargs}.") graph = sns.catplot(data=data, x=x, y=y, kind=kind, **kwargs) - if xlabels is not None: - graph.set_xlabels(xlabels) - if ylabels is not None: - graph.set_ylabels(ylabels) + # graph is a FacetGrid object and we need to set the x,y scales, labels, titles on the axes + for graph_ in graph.axes.flat: + if y_scale is not None: + graph_.set_yscale(y_scale) + if x_scale is not None: + graph_.set_xscale(x_scale) + if xticklabels is not None: + graph_.set_xticklabels(xticklabels) + if yticklabels is not None: + graph_.set_yticklabels(yticklabels) if titles is not None: - graph.set_titles(titles) + if isinstance(titles, dict): + graph.set_titles(**titles) + elif isinstance(titles, str): + graph.set_titles(titles) + else: + try: + graph.set_titles("{row_name} | {col_name}") + except KeyError as e: + if "row_name" in str(e): + graph.set_titles("{col_name}") + elif "col_name" in str(e): + graph.set_titles("{row_name}") + else: + raise e if legend_title is not None: graph.legend.set_title(title=legend_title) else: @@ -125,8 +153,11 @@ def cat_plot( graph.legend.remove() else: pass + if xlabels is not None: + graph.set_xlabels(xlabels) + if ylabels is not None: + graph.set_ylabels(ylabels) graph.set_xticklabels(graph.axes.flat[-1].get_xticklabels(), rotation=rotation) - graph.set(**set) if x_lim is not None: graph.set(xlim=x_lim) if y_lim is not None: @@ -134,9 +165,29 @@ def cat_plot( graph.tight_layout() graph.savefig(folder / file) plt.gcf().clear() + plt.cla() + plt.clf() logger.info(f"Saved graph to {folder / file}") +def digitize_cols(data, digitize): + if isinstance(digitize, str): + digitize = [digitize] + else: + assert isinstance( + digitize, + list, + ), "digitize must be a list of columns to digitize" + if len(digitize) > 0: + for col in digitize: + min_ = data[col].min() + max_ = data[col].max() + NUMBER_OF_BINS = 10 + bins = np.linspace(min_, max_, NUMBER_OF_BINS) + data[col] = np.digitize(data[col], bins) / NUMBER_OF_BINS + return data + + def line_plot( data, x, @@ -193,6 +244,8 @@ def line_plot( the line plot graph object. """ plt.gcf().clear() + plt.cla() + plt.clf() suffix = Path(file).suffix if suffix is not None: file = Path(file) @@ -223,6 +276,8 @@ def line_plot( graph.get_figure().savefig(folder / file) logger.info(f"Saved graph to {folder/file}") plt.gcf().clear() + plt.cla() + plt.clf() return graph @@ -285,6 +340,8 @@ def scatter_plot( """ plt.gcf().clear() + plt.cla() + plt.clf() suffix = Path(file).suffix if suffix is not None: file = Path(file) @@ -320,38 +377,40 @@ def scatter_plot( logger.info(f"Saved graph to {Path(folder) / file}") plt.gcf().clear() + plt.cla() + plt.clf() return graph -parser = argparse.ArgumentParser() -parser.add_argument( +plots_parser = argparse.ArgumentParser() +plots_parser.add_argument( "-p", "--path", type=str, help="Path to the plot folder", required=True, ) -parser.add_argument( +plots_parser.add_argument( "-f", "--file", type=str, help="Data file to read from", required=True, ) -parser.add_argument( +plots_parser.add_argument( "-t", "--plotfiletype", type=str, help="Filetype of the plots", default=".eps", ) -parser.add_argument( +plots_parser.add_argument( "-v", "--verbosity", default="INFO", help="Increase output verbosity", ) -parser.add_argument( +plots_parser.add_argument( "-c", "--config", help="Path to the config file", @@ -359,7 +418,7 @@ def scatter_plot( ) -def main(args): +def plots_main(args): logging.basicConfig(level=args.verbosity) assert Path( args.file, @@ -390,20 +449,19 @@ def main(args): logger.info(f"Creating folder {FOLDER}") FOLDER.mkdir(parents=True, exist_ok=True) - cat_plot_list = big_dict.get("cat_plot", []) - for dict_ in cat_plot_list: - cat_plot(data, **dict_, folder=FOLDER, filetype=IMAGE_FILETYPE) - line_plot_list = big_dict.get("line_plot", []) for dict_ in line_plot_list: line_plot(data, **dict_, folder=FOLDER, filetype=IMAGE_FILETYPE) - scatter_plot_list = big_dict.get("scatter_plot", []) scatter_plot_list = big_dict.get("scatter_plot", []) for dict_ in scatter_plot_list: scatter_plot(data, **dict_, folder=FOLDER, filetype=IMAGE_FILETYPE) + cat_plot_list = big_dict.get("cat_plot", []) + for dict_ in cat_plot_list: + cat_plot(data, **dict_, folder=FOLDER, filetype=IMAGE_FILETYPE) + if __name__ == "__main__": - args = parser.parse_args() - main(args) + args = plots_parser.parse_args() + plots_main(args) diff --git a/deckard/layers/prepare_queue.py b/deckard/layers/prepare_queue.py index 6c4aeb94..ddec462d 100644 --- a/deckard/layers/prepare_queue.py +++ b/deckard/layers/prepare_queue.py @@ -1,6 +1,6 @@ import logging -import os from copy import deepcopy +import sys from pathlib import Path import yaml from hydra.utils import instantiate @@ -273,27 +273,61 @@ def prepare_experiment_folder(cfg: DictConfig) -> None: return exp, scorer, direction, folder, id_ -if __name__ == "__main__": - logger = logging.getLogger(__name__) - config_path = os.environ.pop( - "DECKARD_CONFIG_PATH", - str(Path(Path(), "conf").absolute().as_posix()), - ) - config_name = os.environ.pop("DECKARD_DEFAULT_CONFIG", "default.yaml") +def prepare_queue_main(): + # Use sys calls to look for --working_dir, --config_dir, and --config_file + args = sys.argv + global working_dir + if "--working_dir" in args: + working_dir = args[args.index("--working_dir") + 1] + # remove working_dir from args + args.pop(args.index("--working_dir")) + args.pop(args.index(working_dir)) + else: + working_dir = Path(".").cwd() + print(working_dir) + if "--config_dir" in args: + config_dir = args[args.index("--config_dir") + 1] + # remove config_dir from args + args.pop(args.index("--config_dir")) + args.pop(args.index(config_dir)) + else: + config_dir = "conf" + config_dir = Path(working_dir, config_dir).as_posix() + if "--config_file" in args: + config_file = args[args.index("--config_file") + 1] + # remove config_file from args + args.pop(args.index("--config_file")) + args.pop(args.index(config_file)) + else: + config_file = "default" + if "--version_base" in args: + version_base = args[args.index("--version_base") + 1] + # remove version_base from args + args.pop(args.index("--version_base")) + args.pop(args.index(version_base)) + else: + version_base = "1.3" - @hydra.main(config_path=config_path, config_name=config_name, version_base="1.3") + @hydra.main( + config_path=config_dir, + config_name=config_file, + version_base=version_base, + ) def hydra_prepare(cfg: DictConfig) -> float: exp, scorer, direction, folder, id_ = prepare_experiment_folder(cfg) assert isinstance(exp, Experiment), f"Expected Experiment, got {type(exp)}." assert isinstance(scorer, (str, list)), f"Expected list, got {type(scorer)}." assert isinstance(direction, str), f"Expected str, got {type(direction)}." - assert direction in [ - "minimize", - "maximize", - ], f"Expected 'minimize' or 'maximize', got {direction}." + assert len(scorer) == len( + direction, + ), "Length of scorer and direction must match." assert Path( folder, ).exists(), f"Folder {folder} does not exist for experiment {id_}." return 0 hydra_prepare() + + +if __name__ == "__main__": + prepare_queue_main() diff --git a/deckard/layers/query_kepler.py b/deckard/layers/query_kepler.py index deb310b2..fe67fae3 100644 --- a/deckard/layers/query_kepler.py +++ b/deckard/layers/query_kepler.py @@ -1,8 +1,15 @@ -import logging from datetime import datetime -import pandas as pd import argparse -from prometheus_api_client import PrometheusConnect +import logging +import sys +from dataclasses import dataclass +import pandas as pd + +try: + from prometheus_api_client import PrometheusConnect +except ImportError: + ImportError("Please install prometheus_api_client") + sys.exit(1) v100 = 250 / 3600 @@ -10,6 +17,7 @@ l4 = 72 / 3600 +@dataclass class PromQuery: def __init__(self): self.prom_host = "34.147.65.220" @@ -60,7 +68,15 @@ def caluculate_minutes(self): return str(int(self.total / 60)) + "m" -def run_query(input_file, output_file): +def kepler_main(args): + input_file = args.input_file + output_file = args.output_file + logging.basicConfig( + level=args.verbosity, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + logger = logging.getLogger(__name__) + logger.info("Quering the Prometheus for power metrics") new_columns = [ "train_power", "predict_power", @@ -109,21 +125,12 @@ def run_query(input_file, output_file): data.to_csv(output_file) -if __name__ == "__main__": - logger = logging.getLogger(__name__) - dvc_parser = argparse.ArgumentParser() - dvc_parser.add_argument("--input_file", type=str, default=None) - dvc_parser.add_argument("--output_file", type=str, default=None) - dvc_parser.add_argument("--verbosity", type=str, default="INFO") - - args = dvc_parser.parse_args() - input_file = args.input_file - output_file = args.output_file +kepler_parser = argparse.ArgumentParser() +kepler_parser.add_argument("--input_file", type=str, default=None) +kepler_parser.add_argument("--output_file", type=str, default=None) +kepler_parser.add_argument("--verbosity", type=str, default="INFO") - logging.basicConfig( - level=args.verbosity, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - ) - logger.info("Quering the Prometheus for power metrics") - results = run_query(input_file=input_file, output_file=output_file) +if __name__ == "__main__": + args = kepler_parser.parse_args() + results = kepler_main(args) diff --git a/examples/classification/plots.ipynb b/examples/classification/plots.ipynb deleted file mode 100644 index 1ef9111e..00000000 --- a/examples/classification/plots.ipynb +++ /dev/null @@ -1,252 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import seaborn as sns\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "\n", - "\n", - "# Load data\n", - "df = pd.read_csv(\"output/attack.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "dict_keys(['attacks', 'defences', 'params'])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_3723846/651469242.py:12: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " attack_results['Kernel'] = attack_results['model.init.kwargs.kernel']\n" - ] - } - ], - "source": [ - "from deckard.layers.compile import clean_data_for_plotting\n", - "import yaml\n", - "\n", - "with open(\"conf/compile.yaml\", \"r\") as f:\n", - " config = yaml.load(f, Loader=yaml.FullLoader)\n", - "print(config.keys())\n", - "def_gen_dict = config[\"defences\"]\n", - "atk_gen_dict = config[\"attacks\"]\n", - "control_dict = config[\"params\"]\n", - "\n", - "df = clean_data_for_plotting(df, def_gen_dict, atk_gen_dict, control_dict)\n", - "attack_results = df.dropna(subset=[\"accuracy\", \"adv_accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fig, ax = plt.subplots(2, 2)\n", - "graph5 = sns.lineplot(\n", - " x=\"attack.init.kwargs.eps\",\n", - " y=\"accuracy\",\n", - " data=attack_results,\n", - " style=\"model.init.kwargs.kernel\",\n", - " ax=ax[0, 0],\n", - " legend=False,\n", - " color=\"darkred\",\n", - " style_order=[\"rbf\", \"poly\", \"linear\"],\n", - ")\n", - "graph5.set(xscale=\"log\", xlabel=\"Perturbation Distance\", ylabel=\"Accuracy\")\n", - "graph6 = sns.lineplot(\n", - " x=\"attack.init.kwargs.eps_step\",\n", - " y=\"accuracy\",\n", - " data=attack_results,\n", - " style=\"model.init.kwargs.kernel\",\n", - " ax=ax[0, 1],\n", - " color=\"darkred\",\n", - " style_order=[\"rbf\", \"poly\", \"linear\"],\n", - ")\n", - "graph6.set(xscale=\"log\", xlabel=\"Perturbation Step\", ylabel=\"Accuracy\")\n", - "graph7 = sns.lineplot(\n", - " x=\"attack.init.kwargs.max_iter\",\n", - " y=\"accuracy\",\n", - " data=attack_results,\n", - " style=\"Kernel\",\n", - " ax=ax[1, 0],\n", - " legend=False,\n", - " color=\"darkred\",\n", - " style_order=[\"rbf\", \"poly\", \"linear\"],\n", - ")\n", - "graph7.set(xscale=\"log\", xlabel=\"Maximum Iterations\", ylabel=\"Accuracy\")\n", - "graph8 = sns.lineplot(\n", - " x=\"attack.init.kwargs.batch_size\",\n", - " y=\"accuracy\",\n", - " data=attack_results,\n", - " style=\"Kernel\",\n", - " ax=ax[1, 1],\n", - " legend=False,\n", - " color=\"darkred\",\n", - " style_order=[\"rbf\", \"poly\", \"linear\"],\n", - ")\n", - "graph8.set(xscale=\"log\", xlabel=\"Batch Size\", ylabel=\"Accuracy\")\n", - "graph6.legend(loc=\"center left\", bbox_to_anchor=(1, 0.5), ncol=1, title=\"Kernel\")\n", - "fig.tight_layout()\n", - "fig.savefig(\"plots/accuracy_vs_attack_parameters.pdf\")\n", - "plt.gcf().clear()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "sns.lineplot(\n", - " data=df,\n", - " y=\"adv_fit_time\",\n", - " x=\"attack.init.kwargs.eps\",\n", - " hue=\"model.init.kwargs.kernel\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjMAAAGxCAYAAACXwjeMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAADRbklEQVR4nOydd3zU9f3Hn9/v9/bIHpAQIGxkiggiqDhaREVxVmvdWlv3Xq3bal2ttfanVqto1WpbB666EBw4EJElyExY2esut+++3+/vj0sOQhLIuOTuks/z8QjkvvN9l+T7fX3fU9J1XUcgEAgEAoEgRZETbYBAIBAIBAJBdxBiRiAQCAQCQUojxIxAIBAIBIKURogZgUAgEAgEKY0QMwKBQCAQCFIaIWYEAoFAIBCkNELMCAQCgUAgSGmEmBEIBAKBQJDSGBJtQE+jaRplZWU4nU4kSUq0OQKBQCAQCDqArus0NjZSUFCALO/b99LnxUxZWRlFRUWJNkMgEAgEAkEX2LFjB4MGDdrnNn1ezDidTiD6YaSlpSXYGoFAIBAIBB3B7XZTVFQUu4/viz4vZppDS2lpaULMCAQCgUCQYnQkRUQkAAsEAoFAIEhphJgRCAQCgUCQ0ggxIxAIBAKBIKXp8zkzAoFA0NdQVZVwOJxoMwSCbmE0GlEUJS7HEmJGIBAIUgRd16moqKChoSHRpggEcSEjI4MBAwZ0uw+cEDMCgUCQIjQLmby8PGw2m2gEKkhZdF3H5/NRVVUFwMCBA7t1PCFmBAKBIAVQVTUmZLKzsxNtjkDQbaxWKwBVVVXk5eV1K+QkEoAFAoEgBWjOkbHZbAm2RCCIH82/z93NARNiRiAQCFIIEVoS9CXi9fssxIxAIBAIOszs2bO55pprOrz9ggULyMjIaHd9aWkpkiSxcuXKDh/zrrvuYvLkyR3evpnO2p4qJPP7Gjp0KI899liPn0eIGYFAIBAkjKKiIsrLyxk/fnyH97nhhhtYtGhR7PX555/P/Pnze8A6QaogxIxAIBAIEoaiKAwYMACDoeP1KA6HIyWToEOhUKJN6BCpYueeCDEjEAgEfYDZs2dz5ZVXcs0115CZmUl+fj7PPPMMXq+XCy64AKfTyYgRI/jf//4X2+ezzz5j2rRpmM1mBg4cyC233EIkEomt93q9nHvuuTgcDgYOHMijjz7a6rzBYJAbbriBwsJC7HY706dPZ8mSJR22e+8w05IlS5AkiUWLFjF16lRsNhuHHnooGzZsiO2zZ5jprrvu4oUXXmDhwoVIkoQkSR0+/3vvvUd6ejovv/wya9euRZZlqqurAairq0OWZc4888zY9vfddx+zZs0CotVlF110EcXFxVitVkaPHs1f/vKXFsdv9hj94Q9/oKCggNGjRwPw1VdfMXnyZCwWC1OnTuWtt95q8RnU19dz9tlnk5ubi9VqZeTIkTz//PMd/kz3fF8AO3bs4IwzziAjI4OsrCxOOukkSktL92ln88/ljTfe4Mgjj8RmszFp0iS+/vrrFuf68ssvOeyww7BarRQVFXHVVVfh9Xo7bGu8EGKmG2iqRjioJtoMgUAgAOCFF14gJyeHZcuWceWVV/Lb3/6W008/nUMPPZQVK1bw85//nHPOOQefz8euXbs47rjjOPjgg1m1ahVPPvkk//jHP7jvvvtix7vxxhv57LPPWLhwIR999BFLlixhxYoVLc55xRVX8PXXX/Pqq6+yevVqTj/9dI499lg2bdrUrffyu9/9jkcffZTly5djMBi48MIL29zuhhtu4IwzzuDYY4+lvLyc8vJyDj300P0e/5VXXuGss87i5Zdf5uyzz2bcuHFkZ2fz2WefAfDFF1+0eA1R8Td79mwANE1j0KBB/Oc//2HdunXccccd3Hbbbfz73/9ucZ5FixaxYcMGPv74Y959913cbjfz5s1jwoQJrFixgnvvvZebb765xT63334769at43//+x/r16/nySefJCcnp0Of297vKxwOM2fOHJxOJ1988QVLly7F4XBw7LHHtvDA7G1nM7/73e+44YYbWLlyJaNGjeKss86KCd4tW7Zw7LHHcuqpp7J69Wpee+01vvzyS6644ooO2RpX9D6Oy+XSAd3lcsX92J6GgF5R0qBrqhb3YwsEAsGe+P1+fd26dbrf729z/RFHHKHPmjUr9joSieh2u10/55xzYsvKy8t1QP/666/12267TR89erSuabuvX3/72990h8Ohq6qqNzY26iaTSf/3v/8dW19bW6tbrVb96quv1nVd17dt26YriqLv2rWrhS1HH320fuutt+q6ruvPP/+8np6e3u77Kikp0QH9hx9+0HVd1xcvXqwD+ieffBLb5r333tOB2Hu/88479UmTJsXWn3feefpJJ53U7jn2/Iyuvvpq/YknntDT09P1JUuWtFh/yimn6Jdffrmu67p+zTXX6DfeeKOemZmpr1+/Xg+FQrrNZtM/+uijdo9/+eWX66eeemoLu/Lz8/VgMBhb9uSTT+rZ2dktfo7PPPNMi89g3rx5+gUXXLDf99OR9/XPf/6z1c85GAzqVqtV//DDD9u1s/nn8uyzz8aW/fjjjzqgr1+/Xtd1Xb/ooov0X//61y1s+eKLL3RZlmPvb8iQIfqf//zndm3f1+91Z+7fomleNwkHVEJBFbM1uT9KNawR9EcI+sJY7EasTlOiTRIIBHFm4sSJse8VRSE7O5sJEybEluXn5wPRJmXr169nxowZLUpjZ86cicfjYefOndTX1xMKhZg+fXpsfVZWVixUArBmzRpUVWXUqFEt7AgGg93OadnzvTR3h62qqmLw4MHdOu5///tfqqqqWLp0KQcffHCLdUcccQR///vfgagX5v7772fjxo0sWbKEuro6wuEwM2fOjG3/t7/9jeeee47t27fj9/sJhUKtqqwmTJiAybT7erthwwYmTpyIxWKJLZs2bVqLfX77299y6qmnxrxp8+fP36+3qb33tWrVKjZv3ozT6WyxfSAQYMuWLe3a2Ux7P4cxY8awatUqVq9eHQtnQbSzr6ZplJSUMHbs2H3aHE+S+w6cAoSDKiF/JOnEjK7rMdv8njABT5hwUEVTdaxOI/kWBYMxPgO+BAJBcmA0Glu8liSpxbJm4aJpWlzO5/F4UBSF77//vlX3VofD0a1j95TdBx54ICtWrOC5555j6tSpLcRcc4nzpk2bWLduHbNmzeKnn35iyZIl1NfXx3J4AF599VVuuOEGHn30UWbMmIHT6eThhx/m22+/bXE+u93eaRvnzp3Ltm3beP/99/n44485+uijufzyy3nkkUc6/b48Hg8HHXRQC8HRTG5u7n7t3NfPwePxcOmll3LVVVe12q+7orOzJNcdOAXRVB1fYxBnlmX/G/e0LZpOyB8h5I/gdYUI+SNEwhqyAkazgj0jqro99UFcNQGyB3b+j0wgEPQNxo4dy+uvv46u67Gb1NKlS3E6nQwaNIisrCyMRiPffvtt7MZUX1/Pxo0bOeKII4DoDVRVVaqqqjjssMMS9l5MJhOq2rH8xeHDh/Poo48ye/ZsFEXhiSeeiK2bMGECmZmZ3HfffUyePBmHw8Hs2bN58MEHqa+vj+XLQPSzOvTQQ7nssstiy/b0dLTH6NGjeemllwgGg5jNZgC+++67Vtvl5uZy3nnncd5553HYYYdx44037lPMtPe+pkyZwmuvvUZeXh5paWn7ta8zTJkyhXXr1jFixIi4HrcriATgOBDwRIiEEpMIHAmr+Nwh6su9lG9uoHxzAzU7PYT8YYxmGWeWGXu6GZPFEMv0t9iNNNb48TemXvmdQCCID5dddhk7duzgyiuv5KeffmLhwoXceeedXHfddciyjMPh4KKLLuLGG2/k008/Ze3atZx//vnI8u7bxqhRozj77LM599xzeeONNygpKWHZsmU88MADvPfee22ed9myZYwZM4Zdu3bF7b0MHTqU1atXs2HDBmpqamKt8Y8++ugWYmVPuxcvXszrr7/eotmcJEkcfvjhvPzyyzHhMnHiRILBIIsWLYqJOICRI0eyfPlyPvzwQzZu3Mjtt9/epijZm1/+8pdomsavf/1r1q9fz4cffhgTKc2i8o477mDhwoVs3ryZH3/8kXfffbdFyKYz7+vss88mJyeHk046iS+++IKSkhKWLFnCVVddxc6dO/dr7764+eab+eqrr7jiiitYuXIlmzZtYuHChQlJABZipptIEqghlaA/sv+N44Cu64QCETz1Aaq3uynb5KJii4v6Sh+aqmF1GnFkmrE6TRhMbYeRjGYFXddpqPKjqvFxNwsEgtSisLCQ999/n2XLljFp0iR+85vfcNFFF/H73/8+ts3DDz/MYYcdxrx58zjmmGOYNWsWBx10UIvjPP/885x77rlcf/31jB49mvnz5/Pdd9+1G2bw+Xxs2LCh27N49uSSSy5h9OjRTJ06ldzcXJYuXQpEPSU1NTVt7jN69Gg+/fRT/vWvf3H99dfHlh9xxBGoqhoTM7Isc/jhhyNJUot8mUsvvZRTTjmFX/ziF0yfPp3a2toWXpr2SEtL45133mHlypVMnjyZ3/3ud9xxxx0AsTwak8nErbfeysSJEzn88MNRFIVXX301dozOvC+bzcbnn3/O4MGDOeWUUxg7diwXXXQRgUCg256aiRMn8tlnn7Fx40YOO+wwDjzwQO644w4KCgq6ddyuIOm6rvf6WXsRt9tNeno6Lpcr7i42rytIZYkLSZJIy7WSXdC9GHF7aKpGKKAS9EXwuYKEAmqL8JHRrHR6voWm6XgbQmQX2snIE4PrBIJkJxAIUFJSQnFxcYvkUUHq8/LLL3PBBRfgcrlik6T7C/v6ve7M/VvkzMQBg0nB7w6h5WvISnycXZFwVLwEfBH8riDhoIqu6ygGGaNFweo07v8g+0CWJcxWBVe1H6vDiNnWveMJBAKBoGO8+OKLDBs2jMLCQlatWsXNN9/MGWec0e+ETDxJaJjp888/Z968eRQUFCBJEm+99VarbdavX8+JJ55Ieno6drudgw8+mO3bt/e+sfvAaJabKoe6njej69Hk3ca6ANXbouGjyhI3rkofuq43hY8s0fBRnKqQTFYDalijocqHpvVpB51AIBAkDRUVFfzqV79i7NixXHvttZx++umxknBB10ioZ8br9TJp0iQuvPBCTjnllFbrt2zZwqxZs7jooou4++67SUtL48cff0w6F6usyOi6TsAXxuLouIdDUzVCfpWAL4zfHWoKH6koioTRrGDOMMVtPHp72NKMeOqDWJ0B0rLFU4FAIBD0NDfddBM33XRTos3oUyRUzMydO5e5c+e2u/53v/sdxx13HA899FBs2fDhw3vDtE6jGGX87hDpudZ9CpBIU7Jwq/CRMT7ho84iKzImi4Kryo/FbsRkEZFHgUAgEKQWSVvNpGka7733HqNGjWLOnDnk5eUxffr0NkNRexIMBnG73S2+egOjWSEUUFvNatozfFS1zU3Z5mj4yF21V/jIEb/wUWcx24yEAhFc1X50EW4SCAQCQYqRtGKmqqoKj8fDH//4R4499lg++ugjTj75ZE455ZQWg7/25oEHHiA9PT32VVRU1Cv2GowKkbBGyB9BVTUCnjANVT7Kt7go39JA1TY3PlcQxSBhzzBhzzBjthnjljDcXWxpJhrrAvjcoveMQCAQCFKLpI0pNLdLPumkk7j22msBmDx5Ml999RVPPfVUi+ZFe3Lrrbdy3XXXxV673e5eEzSyAo11ARoqfdHRAZqO0aRgshiwOpNDtLSHYpBRFIn6Sh9mm6HdHjUCgUAgECQbSStmcnJyMBgMHHDAAS2Wjx07li+//LLd/cxmc6xFdG9jsRkJeMIYTArWNBOy3LPJu/HG4jA2jTrwkzXQ3uPJxwKBQCAQxIOkdReYTCYOPvhgNmzY0GL5xo0bGTJkSIKs2jeKUW4KHxlSTshAtJW21WHEXePH3xi/7pwCgUAgEPQkCRUzHo+HlStXsnLlSgBKSkpYuXJlrI/MjTfeyGuvvcYzzzzD5s2beeKJJ3jnnXc61DJa0DUMJgUJaKjyoUbEqAOBQJA8NE+1hug8psceeyyh9giSh4SGmZYvX86RRx4Ze92c63LeeeexYMECTj75ZJ566ikeeOABrrrqKkaPHs3rr7/OrFmzEmVyv8DqNOFpCNJY4ydjgJisLRAIko/vvvsOu11cnwRREipmZs+ezf5GQ1144YVceOGFvWSRAECSJcw2A66aABanCYtdjDoQCATJRW5ubqJNACAcDmM0imtkoknanBlBYjFZDGiqhqsqOo1bIBAIkom9w0ySJPHss89y8sknY7PZGDlyJG+//XaLfdauXcvcuXNxOBzk5+dzzjnntJg+/cEHHzBr1iwyMjLIzs7mhBNOYMuWLbH1paWlSJLEa6+9xhFHHIHFYuHll1/u8fcq2D9CzAjaxeo04m0I4qkPJtoUgUAg2C933303Z5xxBqtXr+a4447j7LPPpq6uDoCGhgaOOuooDjzwQJYvX84HH3xAZWUlZ5xxRmx/r9fLddddx/Lly1m0aBGyLHPyySfHWoU0c8stt3D11Vezfv165syZ06vvUdA2SVuaLUg8shIdsdDQPOrAKn5dBIK+iK7r+MNdH5TbHaxGJW5tIM4//3zOOussAO6//34ef/xxli1bxrHHHssTTzzBgQceyP333x/b/rnnnqOoqIiNGzcyatQoTj311BbHe+6558jNzWXdunWMHz8+tvyaa65pc56gIHGIu5Ngn5htRjx1QVzVPnIGOZFSsORcIBDsG39Y5YA7PkzIudfdMwebKT63ookTJ8a+t9vtpKWlUVVVBcCqVatYvHgxDoej1X5btmxh1KhRbNq0iTvuuINvv/2WmpqamEdm+/btLcTM1KlT42KvIH4IMSPYL9Y0I421QaxOE47M5JpYLhAIBM3snYgrSVJMkHg8HubNm8eDDz7Yar+BAwcCMG/ePIYMGcIzzzxDQUEBmqYxfvx4QqGWY15EFVXyIcSMYL8oBhmDSaah0ofZbsQoRh0IBH0Kq1Fh3T2Jyf2w9tKA3SlTpvD6668zdOhQDIbWt77a2lo2bNjAM888w2GHHQawz27zguRCiBlBhzDbDXjqg7irfWQVOMSoA4GgDyFJUtxCPcnK5ZdfzjPPPMNZZ53FTTfdRFZWFps3b+bVV1/l2WefJTMzk+zsbP7+978zcOBAtm/fzi233JJoswUdRFQzCTqEJElYnUbcNWKytkAgSD0KCgpYunQpqqry85//nAkTJnDNNdeQkZGBLMvIssyrr77K999/z/jx47n22mt5+OGHE222oINI+v661qU4breb9PR0XC4XaWlpcT221xWkssTVr/JIfO4QJouB/KFpKEahhQWC3iIQCFBSUkJxcTEWS/+55gj6Nvv6ve7M/VvcjQSdwuo04m8M4a7xJ9oUgUAgEAgAIWYEnUSSJCwOI+5aPwGPmKwtEAgEgsQjxIyg0xjNCroG9VU+VDHqQCAQCAQJRogZQZewOI34GoI01gYSbYpAIBAI+jlCzAi6hNw8WbvaT9AfSbQ5AoFAIOjHCDEj6DImqwE13DRZW+vTRXECgUAgSGKEmBF0C1uaEU99EG+DmKwtEAgEgsQgxIygW8hK06iDKh+hgAg3CQQCgaD3EWJG0G3MNgMhfwRXtZ8+3oNRIBAIBEmIEDOCbiNJEjanicY6MepA0LuoqoanPkg4qCbaFIFAkECEmBHEBcUooygSDZU+ImFxYxH0PLqm01Dpo7LURfkWF7VlHgLesPAO9mFmz56NJElIksTKlStbrV+wYAEZGRm9bldvIEkSb731Vpf3Hzp0aOyza2hoiJtdyYIQM4K4YXEY8XvCuGpE7xlBz+Oq8eOq8mN1GJEVcFX6qdjqomqbG68riCYaOvZJLrnkEsrLyxk/fjylpaVIktSr5x86dCiPPfZYr56zK8yePZsFCxbEXn/33Xe8/vrriTOoh+nbM98FvYokSVgdRhpr/NgcRqxOU6JNEvRRPPUB6su9mKwKBpMCgMkSbRXgc4fwNoSw2I04sszYnKbYNoLUx2azMWDAgESbkXLk5uaSlZWVaDN6DOGZEcQVo1lB16PufzUinowF8cffGKK2zItilDFZWj6PKUYZe7oZW7qJcChC9fZGyre4qC/3EvSJEFR/4a233mLkyJFYLBbmzJnDjh07WqxfuHAhU6ZMwWKxMGzYMO6++24ikWg1pq7r3HXXXQwePBiz2UxBQQFXXXUVEPV2bNu2jWuvvTYWstkfzaGv/dn05JNPMnz4cEwmE6NHj+af//xnu8c86qijuOKKK1osq66uxmQysWjRog59Rn0NIWYEccfmNOFrDNFYJ8JNgvgS8keoLfOiazoWu7Hd7WRZwuow4cg0I0lQV+GlYquLmh2N+Nwh0eRxb3QdQt7EfMVZYPp8Pv7whz/w4osvsnTpUhoaGjjzzDNj67/44gvOPfdcrr76atatW8fTTz/NggUL+MMf/gDA66+/zp///GeefvppNm3axFtvvcWECRMAeOONNxg0aBD33HMP5eXllJeXx8WmN998k6uvvprrr7+etWvXcumll3LBBRewePHiNo938cUX88orrxAM7u7v9dJLL1FYWMhRRx3V6c+sLyDCTIK4I8kSZqsBV5Ufi924z5uOQNBRIiGV2l0eQv4I9oyOhTAlScJkNWCyGoiEVDwNIRrrg1jsRpxZFqxOIwajCEER9sH9BYk5921lYLJ3adehQ4e28raFw2GeeOIJpk+fDsALL7zA2LFjWbZsGdOmTePuu+/mlltu4bzzzgNg2LBh3Hvvvdx0003ceeedbN++nQEDBnDMMcdgNBoZPHgw06ZNAyArKwtFUXA6nZ0Kde3PpkceeYTzzz+fyy67DIDrrruOb775hkceeYQjjzyy1fFOOeUUrrjiChYuXMgZZ5wBRD1A559/fsxbtGTJkk58kqmP8MwIegST1YCmariqxagDQfdRVY26ci9+Twh7hqlLSZ8Gk4I93YTNaSQciFC1zU3FFhcNlV5CYr5Yn8FgMHDwwQfHXo8ZM4aMjAzWr18PwKpVq7jnnntwOByxr+akYp/Px+mnn47f72fYsGFccsklvPnmm7EQVE/ZtH79embOnNlin5kzZ8bW743FYuGcc87hueeeA2DFihWsXbuW888/v1t2pjLCMyPoMaxOI976IFZngLRsa6LNEaQouqbTUOGlsS6APb1rQmZPZEXG6jSh6zohv0ptmRdXdQBbuglHhhmL3Ygk926FTMIx2qIekkSduxfxeDzcfffdnHLKKa3WWSwWioqK2LBhA5988gkff/wxl112GQ8//DCfffYZRmPyeJkvvvhiJk+ezM6dO3n++ec56qijGDJkSKLNShhCzAh6DFmRMVqUWLhp72RNgWB/6LqOq8ZPQ5Ufm9OErMTPmSxJ0cnvZltTCKougKcugMVhwpllxuo0oRj6ifNakroc6kk2IpEIy5cvj4WGNmzYQENDA2PHjgVgypQpbNiwgREjRrR7DKvVyrx585g3bx6XX345Y8aMYc2aNUyZMgWTyYSqdq6X1v5sGjt2LEuXLo2FvgCWLl3KAQcc0O4xJ0yYwNSpU3nmmWd45ZVXeOKJJzplU19D3F0EPYrZZqSxPoCrykfOIGf/e+IVdAtvQ5D6ci8WmwHF2HPCwmCKlnhrqkbAG8bvDmKyGnBkWbClmYQQTyGMRiNXXnkljz/+OAaDgSuuuIJDDjkkJiTuuOMOTjjhBAYPHsxpp52GLMusWrWKtWvXct9997FgwQJUVWX69OnYbDZeeuklrFZrzOsxdOhQPv/8c84880zMZjM5OTndtunGG2/kjDPO4MADD+SYY47hnXfe4Y033uCTTz7Z53EvvvhirrjiCux2OyeffHI3P7nUpp88dggSSXTUQRCvS4w6EHQcf2OI2l1eDCYZYy+JCVmRsaWZsGea0XWd2l2eaHfhXY0EPGF0kf+V9NhsNm6++WZ++ctfMnPmTBwOB6+99lps/Zw5c3j33Xf56KOPOPjggznkkEP485//HBMrGRkZPPPMM8ycOZOJEyfyySef8M4775CdnQ3APffcQ2lpKcOHDyc3NzcuNs2fP5+//OUvPPLII4wbN46nn36a559/ntmzZ+/zuGeddRYGg4GzzjoLi8XSyU+qbyHpfbzxgtvtJj09HZfLRVpaWlyP7XUFqSxx4cjs379EHcHvCaEYFAYUp4kGZoL9EvRHqN7mJhLWsKUltvliOKgS9EWQ5Kgwd2RasDiNKHEMeXWEQCBASUkJxcXF/fbGNXv2bCZPnpwSHXibWbBgAddcc02PjBBoFlXfffcdU6ZM2e/2S5Ys4cgjj6S+vj5pxj7s6/e6M/fvhHpmPv/8c+bNm0dBQcF+50785je/QZKklPolFuzGYjcS9InJ2oL9E24qwQ4HVazOxCdcGs0KjsxoYrC/MURlabQKyl3jFwMuE8D//d//4XA4WLNmTaJNSRjhcJiKigp+//vfc8ghh3RIyIwbN465c+f2gnWJIaGBYK/Xy6RJk7jwwgvbzCxv5s033+Sbb76hoCBBfRAE3SY66sCAu8aP1WlK+NO2IDlRIxp1ZR78jaGmhnfJk2OlGGRs6WZ0TY96jnY0YjQbsGeYsKebMdsMSWVvX+Tll1/G7/cDMHjw4ARbE2Xu3Ll88cUXba677bbbeuS+tXTpUo488khGjRrFf//73w7t8/777xMOhwHiHqVIBhIqZubOnbtfpbhr1y6uvPJKPvzwQ44//vheskzQExhMCqGASkOVD7O1ZxM6BamHpuk0VHrx1AW73EumN5BkCYvdiNlmIBxUcVX6aawNYHUacWRamgZfit/tnqCwsDDRJrTi2WefjQmsvcnKyiIrKyvu/V9mz57daQ93Xy/bTuoUfU3TOOecc7jxxhsZN25ch/YJBoMtWjy73e6eMk/QBawOI576IO5aP5kD+kYpqKD76LqOu9qHq8qPLS2+Jdg9hSRJmCyGlgMuXSEsNjHgsj+RjAKrP5LUV4wHH3wQg8EQG/LVER544AHS09NjX0VFRT1ooaCzRJ9qDbhrAgS84USbI0gSPPVB6it9mO3GlPTYxQZcppmIhFQx4FIg6GWS9qrx/fff85e//IUFCxZ0yt1866234nK5Yl97TyYVJB6jxRANKVT50FQxWbu/43OHqCvzYjDKGM2p7cmQZQmLwygGXAoEvUzSipkvvviCqqoqBg8ejMFgwGAwsG3bNq6//nqGDh3a7n5ms5m0tLQWX4Lkw+ow4G0Iisna/ZygL0xdmQfQMdsSX7kUL5oHXDqzLBjNCp6GEBUlLiq2umisCxAJiyoogSCeJG3OzDnnnMMxxxzTYtmcOXM455xzuOCCCxJklSBeyIqMyWLAVR3A6jBhsibtr6KghwiHonORwkEVe4Y50eb0GHt2Fw76IlSVujHZDDgzzNjSzeJ3XyCIAwn9K/J4PGzevDn2uqSkhJUrV5KVlcXgwYNjHRebMRqNDBgwgNGjR/e2qYIewGwz4KkL0lDlI7dIjDroT+xdgt0f2HvAZV2FF1dNPx9wGQckSeLNN99k/vz57W7z008/cf7557Ny5UrGjBnDypUre80+Qe+QUDGzfPlyjjzyyNjr6667DoDzzjuPBQsWJMgqQW9iTYtWN9nSTKKTcj9B03TqK5K/BLunEAMue58777wTu93Ohg0bcDgciTZH0AMkVMx0tla+tLS054wRJATFIGMwyTRU+jDbjCmfACrYN7qu46ry4a72Y0tPjRLsnkQMuOweoVDH5r1t2bKF448/vs/3WunP9O8riSApMNsMhAIRXNU+UcLax2lRgi28DzHEgMuOMXv2bK644gquueYacnJymDNnDgDl5eXMnTsXq9XKsGHDWnTFlSSJ77//nnvuuQdJkrjrrrsSZL2gJxFXE0HCkaRoOWtjXQCfW0zW7qtES7A9GE2pX4LdU0RDUEacWRYMRhlXdYDyrQ1UbXPjc4eE2AdeeOEFTCYTS5cu5amnngLg9ttv59RTT2XVqlWcffbZnHnmmaxfvx6ICp1x48Zx/fXXU15ezg033JBI8wU9hPBhCpICg1EhJKlN4SYDBqO42fUlAt4wtWUegD5Vgt2TGM0KRrOCGtHwe8K4G0Koio4a1tBMOnIck4V1Xccfabslf09jNVg7lTc1cuRIHnrooRbLTj/9dC6++GIA7r33Xj7++GP++te/8n//938MGDAAg8GAw+FgwIABcbVdkDwIMSNIGqzOaDJwY02AzIFi1EFfIRxUqSvzEunjJdg9hWKIhqBCYZ2ATycSUQkHVRRFQlYkJFnqdhK1P+Jn+ivT42Rx5/j2l99iM9o6vP1BBx3UatmMGTNavRYVS/0LEWYSJA2SFB3g56rx4/eIcFNfIFaC7Q1hSxeT0ruDJEVFi6xISFL0sw0HVSIhFTWi9ZsQlN0uHnQErRGeGUFSYTQrhPwRGir9mKwGlH5e7ZLKaJpOXbkXT0MQe4a535Vg9xxRUSMpErquo6k6mqoiyRKKQUaWpU73q7EarHz7y297yN79n7u7fPPNN5x77rktXh944IHdPq4gdRBiRpB0WNNMeOuDNNYaycjruPtZkDzouo6r0oe7pmkKtmgG1yM0ixrQ0bRoV2W5yXvTmRCUJEmdCvUkG//5z3+YOnUqs2bN4uWXX2bZsmX84x//SLRZgl5EiBlB0iHL0aZirmo/VodRJIymII11AeqrfFhECXYvISE3fcy6BpGIhqSCLMvIBinqrenDnrG7776bV199lcsuu4yBAwfyr3/9iwMOOCDRZgl6ESFmBEmJyWrAU9806mBwmniyTyG8riD15V6MJkWUYPc6EpIMCs0hKA1NpVshqGRiyZIlrZY15wpddtll7e4nkoH7PuKRSZC02JpGHXgbgok2RdBBAt4wdWVeINoMUZA4osnCMnJTbk04GK2CioRVNNGET9DHEGJGkLTISrS5WkOVj1AgkmhzBPshHFSp3eUhEtawOkXlUvIgIcsyiiHqkVHD0SqocEhFU/tPFZSgbyPEjCCpMdsMhPwRXNV+cdFNYtRwtAQ74AtjSxM5TslJNMQkKzKSBFpTaXc42L9KuwV9EyFmBEmNJEnYnKboqAOX6D2TjGiaTl1FUwl2uijBTgVahKA0nUhIJRwQIShB6iLEjCDpUYwyiiLRUOUjElITbY5gD0QJdqqzW9QAREQISpCiCDEjSAksDiMBbxhXjQg3JRPuWj/1lT6sDlGCndpEQ1DKXiGo/tZdWJC6iKuPICWQJAmrw0hjbQB/YzjR5ghoLsH2YbIoGEyiBLuv0ByCkmQJTW0KQYkqKEGSI8SMIGUwmBTQdRqqfKgRLdHm9GsC3jC1u7xIcrQnkKDv0bK0e3cIKiJCUIIkRIgZQUphdZrwN4ZorPUn2pR+SygQoXaXBzWsYnWIEuy+T7SDsNLPB1wKkhshZgQphRQbdRAg4BXhpt5GDWvUlXsJeiNiCna/Q2o3BKVGNHQRghIkECFmBCmHyWJAUzVcVT40VYSbegtN1agr9+BtCGLPMIkS7H7M3iGocHNeTQ+HoGbPnh0drilJbY4oWLBgARkZGT1y7q6yt0133XUXkydPTpg9+6P58022z3F/CDEjSEmsTiPehiCeejHqoDfQdZ36Kj/umgC2NFNKz/cRxJPdISiIDrgMh1QiIQ21h0TNJZdcQnl5OePHj6e0tLTXRfXQoUN57LHHurz/DTfcwKJFi+JnUDcZOnRoi5lX5eXl3Xp/iUJk7glSElmRMVoUGqr8WBxGTBbxq9yTuGv8uCp9WJ2iBFvQFr034NJmszFgwIC4HCsROBwOHA5Hos0gFAphMrUOFQ8YMID09PQEWNQ9xFVJkLKYbUbCwQiuKp+I1/cg3oYg9RWiBFvQMZJhwOVbb73FyJEjsVgszJkzhx07drRYv3DhQqZMmYLFYmHYsGHcfffdRCLR+W+6rnPXXXcxePBgzGYzBQUFXHXVVUA0zLVt2zauvfbaWDims+wdZjr//POZP38+jzzyCAMHDiQ7O5vLL7+ccHh3TmAwGOSGG26gsLAQu93O9OnTW3hTamtrOeussygsLMRmszFhwgT+9a9/tTjv7NmzueKKK7jmmmvIyclhzpw5nbY9mRGPs4KUxpZmorEuiNVpxpFpTrQ5fY6AJ0xtmRdZlkQJdh9G13V0f89UCEroaDqoug7SHpVRclQMSFZrXENFPp+PP/zhD7z44ouYTCYuu+wyzjzzTJYuXQrAF198wbnnnsvjjz/OYYcdxpYtW/j1r38NwJ133snrr7/On//8Z1599VXGjRtHRUUFq1atAuCNN95g0qRJ/PrXv+aSSy6Jm82LFy9m4MCBLF68mM2bN/OLX/yCyZMnx85xxRVXsG7dOl599VUKCgp48803OfbYY1mzZg0jR44kEAhw0EEHcfPNN5OWlsZ7773HOeecw/Dhw5k2bVrsPC+88AK//e1vY59FX0JcnQQpjWKQMRhlGqp8mO0GjMJzEDdCgQi1ZR7UiIo9XQjFvozu97N1xrT9b9gDjFq+HMVh79K+Q4cObZWXEw6HeeKJJ5g+fToQvYGPHTuWZcuWMW3aNO6++25uueUWzjvvPACGDRvGvffey0033cSdd97J9u3bGTBgAMcccwxGo5HBgwfHBEFWVhaKouB0OuMa6srMzOSJJ55AURTGjBnD8ccfz6JFi7jkkkvYvn07zz//PNu3b6egoACI5t188MEHPP/889x///0UFhZyww03xI535ZVX8uGHH/Lvf/+7hZgZOXIkDz30UItzl5aWxu19JBIRZhKkPGa7gaAvjLvaJ3pexIlIWKV2l5egL4ItTZRgC3qOcCi+ISiDwcDBBx8cez1mzBgyMjJYv349AKtWreKee+6J5a44HI5YUrHP5+P000/H7/czbNgwLrnkEt58881YCKqnGDduHIqy+0Fs4MCBVFVVAbBmzRpUVWXUqFEtbP7ss8/YsmULAKqqcu+99zJhwgSysrJwOBx8+OGHbN++vcV5DjrooB59H4lEeGYEKU/zqAN3TQCr0yxuvt1EUzXqy7343EEcGWIKdn9AsloZ9vWyBJxZB7OVSFhDiujISssQVE/g8Xi4++67OeWUU1qts1gsFBUVsWHDBj755BM+/vhjLrvsMh5++GE+++wzjEZjj9i093ElSULTtJi9iqLw/ffftxA8QCyR+OGHH+Yvf/kLjz32GBMmTMBut3PNNdcQCoVabG+3d80DlgoIMSPoExhMCqGASkOlD7PVgGIUTseuoGs69ZU+3LVB7OmiBLu/IEkSks2WUBt0XUeLaGgRkJXdScSdFTWRSITly5fHwisbNmygoaGBsWPHAjBlyhQ2bNjAiBEj2j2G1Wpl3rx5zJs3j8svv5wxY8awZs0apkyZgslkQlXVrr/RTnLggQeiqipVVVUcdthhbW6zdOlSTjrpJH71q18BoGkaGzdu5IADDug1OxONEDOCPoPVacRTF8Rd6ydzQN99AulJXDV+XFV+rA4DsiIEoaD3kCQJSWku7dZRVRVZlnYLmw4Ka6PRyJVXXsnjjz+OwWDgiiuu4JBDDomJmzvuuIMTTjiBwYMHc9pppyHLMqtWrWLt2rXcd999LFiwAFVVmT59OjabjZdeegmr1cqQIUOAaJ7O559/zplnnonZbCYnJ6fHPhOAUaNGcfbZZ3Puuefy6KOPcuCBB1JdXc2iRYuYOHEixx9/PCNHjuS///0vX331FZmZmfzpT3+isrKyX4kZcbUS9BkkScJiN+Cu8RPwiFEHncVTH6S+wovJKkqwBYmjubRbaWPAJTr7zYuz2WzcfPPN/PKXv2TmzJk4HA5ee+212Po5c+bw7rvv8tFHH3HwwQdzyCGH8Oc//zkmVjIyMnjmmWeYOXMmEydO5JNPPuGdd94hOzsbgHvuuYfS0lKGDx9Obm5uz30Qe/D8889z7rnncv311zN69Gjmz5/Pd999x+DBgwH4/e9/z5QpU5gzZw6zZ89mwIABzJ8/v1dsSxYkvY9nTLrdbtLT03G5XKSlpcX12F5XkMoSF45MS1yPK+geXlcIq9NI3mCn8C50EL8nRNW2RiQJLPaeyQsQdI9wJES9r5IhQ4ZiMfena46OrkdDoD879hgmT5rEn//8WJdCUIKOsWDBAq655hoaGhp6/FyBQICSkhKKi4uxWFr+Xnfm/p3QK/3nn3/OvHnzKCgoQJIk3nrrrdi6cDjMzTffHEtmKigo4Nxzz6WsrCxxBgtSguZRB411gUSbkhKE/BFqd3nRVU0IGUESsseAS+Cpp58iPSONH75fiRoWAy7jjcPh4De/+U2izeg0CRUzXq+XSZMm8be//a3VOp/Px4oVK7j99ttZsWIFb7zxBhs2bODEE09MgKWCVEKWJUwWAw1VfoL+ni2pTHUiIZXaMi8hfwSrqAITJDkvLPgnq1au4btlyxk5cjThsEqoFwZc7ou5c+e2KJne8+v+++/vdXu6y8qVK1m7di0//PBDok3pFAlNAJ47dy5z585tc116ejoff/xxi2VPPPEE06ZNY/v27bFYoUDQFmabAU99EFeVj5wiZ4eTB/sTqqpRJ0qwBSlEYWHhXkt0dC064FJSQZZlZEO0y3Bv/T4/++yz+NvpnpyVldUrNsSTfVV5JTMpVc3kcrn2O5o8GAwSDO6epOx2u3vBMkEyYnUa8dQHsTpNOLP6U47B/tE1nYZKH411gZQuwfYEIrj8YUwGGZNBxmyQMRsUhC7rL/TegMv2aC2wBIkgZcRMIBDg5ptv5qyzztpnItADDzzA3Xff3YuWCZIVxSBjMMm4qnxY7EaMZlGh00ysBNtpTMkk6WBYo6oxQJU7SEhVQQIJCaMiY5JlrBYFh8mA2SBjbBI4RkUonL5Mc2k36GhadMClLEtRT00nSrsFqUlKiJlwOMwZZ5yBrus8+eST+9z21ltv5brrrou9drvdFBUV9bSJgiTFbDPgbQjSUO0jp9AhQimApz5AfbkXs1XBYEwtgadqOrXeIOUNAXyhCE6LkXRbNGlZ1yGsaoRVjXpPmGot6qFVZCnmtXGYDViMCmaDgskgCS9OnyQaZkKOhqDUsIbaS92FBYkj6cVMs5DZtm0bn3766X7Ls8xmM2azGIoniCJJEhaHkcbaADanqd8PTPQ3hqgt86IYZYyWpP/zj6Hr0OAPU+Hy0+ALYzEq5DgsLYSIJBELN9n3+DFHVJ2wqhEIabj9ATR0JMCkyBgVGZtZwW4yYDY2h6oUDOIpvg8QDUFJzSGopu7CvRmCEvQeSX01axYymzZtYvHixbGmRQJBZzAYFRSladSBzZBy3oh4EfJHqC3zomt6SlUueUMRqlwBqj0hJCDbbkbuRGTMoEgY9ppp0+zFCakadZ4QVXoAkDDI0VCVxShjNxuwGhXMihITSeKBPjXZOwQVCanRcm8RguozJFTMeDweNm/eHHtdUlLCypUrycrKYuDAgZx22mmsWLGCd999F1VVqaioAKIZ4iZT6lyMBYnH4ogmA7tqAmQP7H+jDiIhldpdHkL+CPaM1PjbCUU0qhuDVLoDBCMaGTYjxjjl9+zpxaENL44vqNHg86MDMhJGQ1TkOEwKNrMBk0HB3BSmUsSNMIVoCkE1V0H14oBLQc+SUDGzfPlyjjzyyNjr5lyX8847j7vuuou3334bgMmTJ7fYb/HixcyePbu3zBT0AaKjDow01vixOYxYnalxQ48HzSXYfk8IewqUYGs61DXlxXiCERwmA2nO3mnm1+zFsUZfRe3RmnJxNI0aT4iIOwjoGBQZkyJjNRqwmxXMxmiIymyIhq+S/GPul7z44gtcf+N1VFfWtqyCisOAS0FiSaiYmT179j6bHPXxSQuCXsZoVggFIjRU+THZDCgpWMXTWXRNp6HCu7sEO4kv0LoOjYEIFS4/td4gZoOBHIc54aJAlsEsy5hpw4sT0WgMhKnzRZONZUnCoEiYlGiYymZSMBujAsekyMKLk4TsPeBSU1WkLgy4FCSWpM6ZEQjijdVpio46qDWSkWdLtDk9iq7ruGr8NFT5sTlNSV2C7Q+rVLoCVHuC6Dpk2cwoSV5KvduLszsfR9MgpKqEVZ2axiARTQcJjHJU4FhNBux7ChzhxUkaWubV7A5BKUpU2IgQVHKTvFc3gaAHkGUJs9WAq9pP0Ne3J2t76oPUl3ux2AwoxuT8Uw+rOuWuAD9VNFLu8mM3Gciym5JeyLSHLIPFqOC0GMi0m8h1msmxm7Gbo2Eytz/Mtnofm6rcrCt382OZm/UVbrbVeqlqDODyh/GHVcS4obY55mdHcfU1V3H1NVeRk5fFwMJ87rzrjpgXv76+ngsuPJ+8ATmkZzqZd+LxbNq8qc1jlZaWYrYa+f775S2WP/7Xxxk1ejiSpCNJoEZ2T+1WI4kZmSDYP8IzI+h3mKzRUQcNVT5yB6f1STeyzx2irsyLwZScJdi6DvW+EOUuP25/GJvJQK6zb3ZpliQwKhJGpeXPodmLEwxreAIRVE1DkiSMcjQkZTErOJq8OCaDjCJpPWajrutEQj13/H1hMMmd8nj886UXueD8C1n65dd8//33XHb5bxhcNJiLLrqYiy65kM2bN/PGf9/EmZbG7353KyedNI9VK9dgNLbMuxo6dChHH3U0L7z4AgcdNDW2/IUXX+Ccc85FlqMet7ZCUIoiIylSn7x2pCrJd5UTCHoBW5oRb30QqzNAWrY10ebElaA/Ql2ZB13XMduSL9G5MRChwh2g1hPEKMtk2y2dKrXuK8gyWGQFyx73WF2HiKYRimi4fGFqPNFkY0WSsRg1su3RpoAGTUNCQpKieTrdJRLSeP6mL7t9nK5wwUOzOtWde9CgIh55+FEkSWL0qNGs/XENf/nrXzj8iCN49913+Gzx58yYcSgQHUw5bMRQFr69kNNOPa31uS+4iCuuvIyHH3oEs9nMDz+sYO3aNbz+nzdabLd3CCocahY1IgSVLPTDS4hAALIiY7QouKr8hAJ9Z7J2uKkEOxxUsfZSBVBHCYY1dtT52FDhps4bJMNqIt1m7JdCpj2iXpxo8nCGzUiuw0yuw0K6xYhBlprEjk4wrBGIqATCKv6QSjCiElY1VE1D03V0+m4oZPq06S2EwyHTZ7B58ybWr1+PwWBg2rTpsXXZ2dmMGjWan35a3+axTjrxJBRF4a2FbwHw4j9fZPYRsxk6dGg7Z496YxSDhER0wGU0BKWhJmhqtyCK8MwI+i1mm5HG+gCuan901EGKu4zViEZdmQd/YwhHZvKUYDePIChrCOBvGkFg6aeNC7uKokgYUJAlCUWKJh9D1JOj6ToRrekFNM2pinpsZFlCbppbJUu0+zthMMlc8NCs3nkzbZw7UZhMJn519q948cUFnDz/ZF597V88+sifO7Bn4gdcCloixIygX2Nzmmisaxp1kJG6ow40Taeh0ounLog9IzlKsFuOIAhhMRpajSAQdA9JAmWvD1TXQScqctSI3mJbSYqKGlnaLW6al6fKINZl3y1r8frbb79hxIiRjB07lkgkwrJl38bCTLW1tWzcuIGxYw9o93gXXHARB06ZxFNPP0kkEuHk+Sd3yp5WAy5DavTzFd2FexXh4BX0axSDjGKQqK/0EQmpiTanS+i6jrvah6vKjy0tOUqwvaEIJTUeNlU24glEyLZbcFoMQsj0AtE8muiAzWj5ePRLlqJehIimE4poBMIa/qYwVSCsElI1IpqGqulJHS7ZsWM7N950PRs2buDV117l/578G1defiUjR4xk3rwT+c1lv2Hp0i9ZtXoV519wLoUFhZw478R2jzd2zFimT5vObb+7lV+ccSZWa1dz6CRkWY5V4qnhaAgqHFJFCKoXEJ4ZQb/HYm8edeAna6A9KbwancFTH6S+0ofZbkx4CXYoolHVGKSqB0YQCLrHfr04atML2vLiNHtwoiGrRPKrs8/B7w8wc9YMFEXhisuv5OKLLwHg2b//g+uuv5b5p5xEKBTisFmHsXDhO60qmfbmgvMv5Otvvub8886Pg4ViwGUiEGJG0O+RJAmrw4i7xo/VYcKWQkMYYyXYRjmhYQJNh1pPkApXAE8wjMNsJM2aXAnIgtZITfk1e7rMmgWOrutEdGK5OM3bSpKEIu8OT8m0n4vTExiNRh595E888de/tVqXmZnJ888taHffc889j3PPPa/V8l1luxg/fgJTpx4cT1NFCKoXEWJGIAAMJoWQP0JDVXSytmJIfm9C0BemrswDJK4Ee+8RBBaDyItJddoSONAkcvS9cnH2EDjNoa14loz3NB6Ph9JtpTz51P9x95339OCZxIDLnkaIGYGgCavThKc+SGONn4wByT1ZOxxSqS3zEg6qCUtc9odVKlx+ajwhSJERBIKu0xxmgva9OBG1pRcnWk21W9w0C59k4eprruK1f7/KiSeexPnnX9ALZ5TEgMseQogZgaAJSZYw2w24agJYnCYs9uQMk+xdgt3bhFWdGk+QCneAQChCutWEKQU8WYL4s28vjo6q6UTaKhmXokKns16cTz7+NK72/+PZ5/jHs8/F9ZgdRQy4jC9CzAgEe2CyGPAGgriqfJgGO5OiMmhPNE2nviIxJdj9aQSBoHs059PsSYtkYx1o4cWRkOU2SsYTnGzcG4gBl/FBiBmBYC+sTiPehiBWp4m0nOQZdaDrOq4qH+5qP7b03i3BFiMIBN1l/14c2vbi7NH4L1VycbqG1PQ3FfXWqBENVYSgOowQMwLBXjSPOmio8mOxGzFZk+PPpEUJdi+FdYJhjarGAJXuAKquk2E1xbrPCgTxYL9enH2WjPdNL057ISgx4LJ9kuMqLRAkGWabEU9dEFe1j5xBzoT3hYiWYHswmpReKcGOaDp1zSMIghGcVjGCQNB77MuLo+1VMt6V8Q2pghhw2XGEmBEI2sGaZqSxLhpucmQmLjck4A1TW+YBwGzr2T/ZvUcQWE0Gcpyi1FqQHMRtfEPKeXGaQlAy0dLuiIYUiXqRZUNUxPV3USPEjEDQDopBxmCUaWgK7RhNve+ZCAdV6sq8REIa9vSe7SXjDUWocAWo9YSQkERejCAl6LYXJwGN/7qOGHDZHuJSJRDsA7PdQNAfwV3t6/XZKrESbG8IW1rPlYmHIho76/1sKG+kyh3EaTGQaTcKISNIOuYe+zNuvun6Dm0rNc+n2mNGlSJFPRiarhNWNYJhjUBYjc6oCqsEIyphNTqfStN1dDr2Nz9y1HAe/+tfuvPWOo0k7U4M1vVod+FwUCUSVtG0/jcHSnhmBIJ9IEkSVqcRd00Ai8OEPb13+rpomk5duRdPQxB7hrlHnhrFCAJBf6Mr4xv2bPyXnF6clt2F1bCG2tRdWFb6TwhKiBmBYD8YjAohWcVV5cdi6/lhjrqu46r04a5pmoIdZ7exroM7EKbCFaBOjCAQ9FNCoRAmk2m/4xvaavwnSRLKHo3/ksMP0r8HXApHskDQAaxOI/7GEO4af4+fq7EuQH2VD0sPlGD7wyqltR42VDTi8ofJsplxWg1CyAhSkg8+eJ/Cglxee+1f7Ny5g3PP+SWDCvMYXDSAX/ziVLZtK41te+mlF3Pmmafx8EN/ZOSIoUw5cDzbtpXidJhZuPAtjpv7c/JyM5hxyFS+/fabaE8bORqqWvbtVxx37NEMzMtg3NgR3HjDtTQ0emJhKh2dcESLhakimtZumMpkMfDcc//gtDNOJT3TyQHjxvDOu+/E1quqyq8vvYRRo0eQluFg3IQD+OsTj7c4xkUXX8ipp5/CHx98gEGDC8jNz+a+P9xLJBLhlltvYkBBHsNHFfPPl15oEYIq2VrK6aefQUZGBllZWZx00kmUlpbSFxBiRiDoAJIkYXEYcdf6CXjCPXYerytIfbk37iXYYVWn3BXgp/JGKlwBHGYDWXaTmKUkAKLewHAwkJCvruai/fvfr3LhBefy7D8WcMoppzH/pBNwOJ18+OGnfPTxYhx2ByfPn0coFIrt89mSxWzatJG3336f//znrdjye+6+g6uuvoalXy1jxIiRXHjBuUQiEQC2bt3CKSfP46T58/n6m+UseOElvv3mK26+8dpYLk5zdZSq6YQi0Vwcf1jFH1IJhFVCkajAUZtyWe77w72cdurpfL/8B449di7nnX8OdXV1AGiaRmFhIf965VVW/bCG3932e26/4/f857//afH+lyxZTHl5OYs+WczDDz7CPffezfyTTyQjI5Mvv/iKSy7+NZddcRllZWUoikQ4HOa44+dit9n59NMlfPH5FzgcDo499tgWn1GqIum9ndXYy7jdbtLT03G5XKSlpcX12F5XkMoSV0LLdgW9i88Vwmw3kjfUiRLnDrwBb5jq7Y1oqobVGZ/KpeYRBLsa/HgCYewmAzaziC6nIhIRrEY3RYMHYzbH95oTDgZ47re/jOsxO8qFT76CsYPvZ+6xP2PixIkMHz6Ce+65k1df/S+zDjucV199hYcefIDvV6yO5YeEQiEGFebxr1f/w9FH/4xLL72YTz7+iPU/bcZkiv59bdtWyvhxo3nib09x3nnRQZM/rV/PwQdPZvn3qxg9egyXX/4bFFnm8b/+X8yOr75aytxjj6Gyqh6LxcK4A0Zx2eVXcPnlV8W22TMXR4dY47/MdAs33ngrd9x5F7Ik4fd5ycnN5O233+XYnx/b5vu++pqrqKis4LV//RuIemY+//wzNvy0CbkpU3/8xHHk5eby6aIlQNTDk5OXxVNP/p1fnPELXn7lZR744/2sXrkmZktEDZObn8Obb77JnDlzOvQziDeBQICSkhKKi4uxWFr+HnTm/t3lq9oXX3zB008/zZYtW/jvf/9LYWEh//znPykuLmbWrFldPaxAkNRYnEa89UEaa41k5NnidtxQIELtLg+RcPxKsBsDTaXW3ugIApEXI+gLvPXWm1RXV/HxJ0s46KCpAKxZs5qtW7cwcEB2i20DgQAlW7fC0dHX48aNiwmZPRk/fkLs+/wBAwCorq5m9OgxrF2zmrVr1/Dvf78a20bXdTRNo7S0hDFjxrZpZ3u5OADjxo8nokWTchSTFWdaGrvKKgiEVWRZ4u9PP8k/X1zAjp078Pv9hEIhJk2a1OIYBxxwQEzIAOTn5TFu3PjYa0VRyM7Kprq6qukzWsWWLZvJzs1s9Rlt3LCJo486JqUHXHZJzLz++uucc845nH322fzwww8Eg0EAXC4X999/P++//35cjRQIkgVZljDbDLiq/VidJsxxGHWghjXqy70EfGEcGd2vlgqEVarcQaoaxQgCQccwmMxc+OQrCTt3Z5g4aRKrVq7kny8uYMqUg5AkCa/Hw4EHTuHZfyxotX1OTm7se5vd3uYxjcbdVXzNnh1d0wDweDxceOHF/Oa3l7far6hocKdsb8ZkMmLYQzTIUlPSrq7z71df47bbbube+x5k2vTpOJ1O/vqXP7F8+XeomtZkn47B2LLyUJKkFu+jeZkWex9epkyZwgsL/rmXNTrZWbkpP+CyS1fi++67j6eeeopzzz2XV1/drVZnzpzJfffdFzfjBIJkxGQ14KmPTtbOKXJ260lGUzXqKrx441CCHdF0aj1Byl1iBIGgc0iS1OFQT6IZVjyM++9/kOPm/hxFUXj0T39h0uQDeeON/5Kbmxf3dILJkw/kp5/WM3z4iLged2+a++J8t+xrpk8/hN/85jexxn9bS7aiA4GwBlL0b13TdEKqFhvfsD8OnHwg//nvv8nbx2eUygMuuxT037BhA4cffnir5enp6TQ0NHTXJoEg6bGlGfHUB/E2BLt8jOgUbD/uGj/WbpRgN+fFbKpsZGtNdOxBjtMihIygzzJy5Cjee/9DFi58i5tvup5f/OIssrKzOfMXp7F06ZeUlpbwxeefceMN17Jr185uneva627g22+/4frrrmb16lVs3ryJd999m+uvuzpO76Ylw4eP4IcfVvDJJx+xefNG7r/vbn5Y8T0S7G7811QOHo7sbvyn6joRTWvR+E+HWEXVWWf9kuzsHE497WS+/PILSkpK+OyzJVx73TXs3Bn9jJob8UmyhKbqRJqqoNSwlvSN+LrkmRkwYACbN29m6NChLZZ/+eWXDBs2LB52CQRJjazIGEwyDVU+zDYDJkvn/5TctX7qK31YHV0vwfYEI1S6A9R4QshIZNvECAJB/2DUqNG8+94HHDf358iKwocfLuL223/H2b/8BR5PIwUFBRxxxJE4nd3z1IwfP4H/ffAJ99x9B3N+fhS6rlNcPIxTTj09Tu+kJRdedAmrVq/i/PN+hSRJnHbaGVx8yaV8/NGHQFMKjhT9vzl8rOvN/WWI5eJAVMhEIjqBsIrRbOGjjxZx++23ccaZp9PY2EhhQSFHHnlUK09NKg647FI10wMPPMBLL73Ec889x89+9jPef/99tm3bxrXXXsvtt9/OlVde2aHjfP755zz88MN8//33lJeX8+abbzJ//vzYel3XufPOO3nmmWdoaGhg5syZPPnkk4wcObLDtopqJkFPoes6nvogaTlWcgY5OvXH7XUFqd7eiMEoY+pC3k0oolHVGKTSHSCkamRYTRhFXkyfpiermQR9k+bGf7GONzotGv/tHsLZ3OG4vZBVtLuwpuvRrshxHHCZ0GqmW265BU3TOProo/H5fBx++OGYzWZuuOGGDgsZAK/Xy6RJk7jwwgs55ZRTWq1/6KGHePzxx3nhhRcoLi7m9ttvZ86cOaxbt67VmxYIehtJkrA5TTTWBbCldXzUQcAbpnaXF0mm00JG1XTqvCEqXH48wQgOs5F0MYJAIBC0QbNAYQ+B0pnxDbIkxYRPsg+47FafmVAoxObNm/F4PBxwwAE4HI6uGyJJLTwzuq5TUFDA9ddfzw033ABEq6Xy8/NZsGABZ555ZoeOKzwzu/E3hijf7GLwuCwMCZgA3VfxN4YwmBTyi9Mw7CdPJRSIUL29kZA/gr0TlUttjSBwWETn3v6E8MwkP6+99i+uvqp11RNEK5++W76ydw3qBFEvTlOeTbMq2HPK+B7jG5qFj67r6Hp0vWKUuxQuT3ifGQCTycQBBxzQnUO0S0lJCRUVFRxzzDGxZenp6UyfPp2vv/66w2JGECUciLDk5Q146oPs2ljPrNNHJlxJ9xUsjmgysKsmQPbAtks/IVqCXVfuJeiNYM/seC8Zf1ilwuWnujHapTPLZhadewWCJOS4405g6tSD21y3d9l0stE8SHNPmr04mq6j6hD9p7UXR1M1kIn7+JXO0CUxEwgE+Otf/8rixYupqqqK1bE3s2LFim4bVlFRAUB+fn6L5fn5+bF1bREMBmN9byCq7Po7uq7z3XuleOqjn0vFVjdrPtvFxCMHJdiyvoEkSVjsRhpr/Ngcxja792qqRl25B29DEEcHS7DDqkZNY5AKd5BAOEK61YQpgRcLgUCwb5xOJ06nM9FmxI19D+HUY0M4dU1HlyE+7T67RpfEzEUXXcRHH33EaaedxrRp05Iqq/mBBx7g7rvvTrQZScXGZZXs2tiAJEuMnp7PT19XsOGbCjLyrAwel73/Awj2i9GsEApEaKj0YbIaWjyh6LpOfZUfd00AW7ppvx4xXYc6b4gy1+4RBLlOEVYQCATJwd5enIim0/WElfjQJTHz7rvv8v777zNz5sx42xNjQFNL6crKSgYOHBhbXllZyeTJk9vd79Zbb+W6666LvXa73RQVFfWYnclO9fZG1iyO9hA48GdFDJ+Sh67Dhm8qWP7+NpzZFjIHtB8aEXQcm9OEpyFIY12gxagDd40fV6UPq3P/JdiN/ggVbjGCQCAQCDpDl3zWhYWFPe5KKy4uZsCAASxatCi2zO128+233zJjxox29zObzaSlpbX46q/4PSG+fmsLug6Dx2Ux7MBoW+8JRxQyYFgaakRj6etbCHh7bgp0f0JqHnVQ5Y99pt6GIPUVPkwWZZ9J14GwyvZaHz9Vuqn3hciwmki3GYWQEQgEgg7QJTHz6KOPcvPNN7Nt27Zundzj8bBy5UpWrlwJRJN+V65cyfbt25EkiWuuuYb77ruPt99+mzVr1nDuuedSUFDQohdNoti5oZ6Pn1vHT1+XJ9qUNtFUjW/e3ErQGyEt18pBxw6JuQUlWWL6ScNwZJnxu0N8/eaWaAKXoNuYLAY0VcNV5cPnDlFb5kWWpXZLsCOaTqU7wE8Vjeyq92E1KmTZxSwlgUAg6AxdCjNNnTqVQCDAsGHDsNlsrbK06+rqOnSc5cuXc+SRR8ZeN4eHzjvvPBYsWMBNN92E1+vl17/+NQ0NDcyaNYsPPvggKXrMhHwRdm2oJyPfmmhT2mTNkl3U7PRgMCscesrwVl4Bk8XAzNNGsOiFn6jZ4WHlxzuYcuyQBFnbt7A6jXgbgtE24BG1zf4zug4N/hAVrgAN/hBWo4EcpwgpCQQCQVfokpg566yz2LVrF/fffz/5+fldTgCePXs2+2pzI0kS99xzD/fcc0+Xjt+TWJ1RARfyqwm2pDU7f6pj47JKAKYdPxRnVtviLy3byvR5xSz972a2/FBNRr4tFooSdB1ZiXb1DYdUbGmt8/vFCAKBoGvMPfZnfPnl5wAs/WoZEydOarH+pZde5Jabb2DnrqpEmNcpLr30YlyuBl599b/tbqPrOlddeRkLF75JfX19m+95f59Jf6FLYuarr77i66+/ZtKk/vmhAbHy26A/kmBLWtJYG+C790oBGD09n8LRmfvcvmBkBuOPKGTtZ7tY8dF20nIs5BT1ndLCRGGyGlqFlkIRjcrGAFXuoBhBIBB0kfPPv5Df334n2dk5bNtWyvhxo2n0dH3ga2cZd8AoLrv8Ci6//KoeP9fHH3/Iyy//k/f/9zHFxcVkZ+dw6aUXM2TwEG773e0AvPzKa5SUbGX2ET1XkJMKdOl5cMyYMfj9/njbklJYm5641bCGGk6OfJNISOWrNzYTCWnkFDkYP7tjfWTGzBjAoDGZ6JrOV29swecO9bCl/QtV06luDLKhws3OOh8mRSbXYRZCRiDoAjabjfz8ARgM3er5mjBUVW3Vm609SrZuZcCAgRxyyIx233NWVhY5OTnxNjPl6JKY+eMf/8j111/PkiVLqK2txe12t/jqD5gsCrIhejMK+BJfDaTrOsv/tw13TQCL3ciM+cORO9jhV5IkDj5+KOl5VoK+CF+9vjlpBFoqo+vg8ofZXOVhc7WHiAY5DgtWMUpCIOhR3nlnIZMnHUBOdhrzTzqenTt3tFj/7rtvM2vmdHKy05gwfjQP3H8fkUjUy67rOvf/4V7GjhlBdpaTkSOGcuMN1wLRkM727du45eYbcTrMOB37H0ny0ksvMqgwj/fee4epB00iO8vJjh3bY+sfuP8+hg4ppGBgDldfdTmhUPRh8tJLL+aGG65lx47tOB1mxh0wKl4fT5+kS9L22GOPBeDoo49usVzXdSRJQlWTL48k3kiShNVuxOsKRdvTd3DIYE+xZUU1O9bVIUlwyMnDsDg61zrbYFKYeeoIPlmwnvoKH8v/V8q0ecVJ1RAxlQiEVcqbRhBIQJbVJEYQCJIWXdchUQ8wRjmu1xmfz8cjDz/I039/DpPJxHXXXsX555/DJ58sAWDp0i+59NcX8dDDf+LQQ2dSUrKVq668DIBbb/s9C996k7/97XGeX/BPxo49gMrKStasWQ1EQzqHzjiYCy64iPMvuLBTNv35z4/yxN+eIisri9zcPAA+W7IYi9nC//73Mdu2l/Lb3/yarKxs7rzrHh566FGGFQ/j+ef/wWefL0WWxUPQvuiSmFm8eHG87UhJLI4mMZNgz0ztLg8rP4k+eUw8ahC5Xcx5sWeYmXHyMD7/10a2/1hHRr6N0dMHxNPUfkFjIEJprRdPICxGEAhSg7CG68HlCTl1+s1ToYveyiFDhrbKlwmHwzzy6GMcfPA0AJ56+lmmHjSJ5cu/Y+rUg/njA/dx7XU3cvbZ5wBQXDyM399+F7f//jZuve337Ni5nbz8fI488miMRiNFRYNj85aysrJQFAWH00F+fsevjeFwmD//+XEmTJjYYrnRZOL/nvw7NpuNsQccwO9/fwe///2t3H7HXaSnp+NwOlAUpcW5nn762S59Vn2dLomZI444It52pCQWhwnwEvAmLgk46Avz9Ztb0DWdQWMyGXlw/v532gd5Q9KYfMxgfvh4O6sX7yQ918qAYelxsrbvU+sJsb3OR1jVRPdegSABGAwGDjpoauz16NFjyMjIYMOGn5g69WDWrFnDN998zSMP/zG2jaqqBAIBfD4fJ598Kv/3tyeYMH4Mx/zs5/z858dy3HHHdytHx2QyMX78hFbLJ4yfiM22u1v4tOmH4PF42LlzB4MHi1YZnaHDP53Vq1czfvx4ZFlm9erV+9x24sSJ+1zfV7A2hXIS5ZnRNZ1vFm7F3xjGmWVh6nFD4+KuHX5QLvWVPkpX1/DNwq0cc95YHO2Udwui6DpUuPzsqPdjVGSy7IkcuSYQdBKjHPWQJOjcvYnX6+G2393OiSfOb7XOYrEwaFARK35Yw+LFi1j86SKuu/Yq/vKXP/HBB590efK11WoVIfsepsNiZvLkyVRUVJCXl8fkyZORJKnNHjH9JWcGiOWlJMoz8+MXZVSVNqIYZWacMhyjOT4xVUmSmDJnMO4aP3VlXpa+vpmjzh0bt+P3NcKqzs56LxWuAA6zUST4ClIOSZK6HOpJNiKRCCtWfB8LDW3cuIGGhgZGjx4DwKTJB7Jp0yaGDx/R7jGsVivHHXcCxx13Apf8+jccNGUiP/64lsmTD8RoMsbtHrdm7Wr8fj9Wa7T56nfLvsXhcDBoUP+dJ9hVOixmSkpKyM3NjX0v2C1mggmYbVS2qYH1X0VHKUydO4T03Ph2IlYMMoeeOpxPnl+PuybAsndKOPTU4eLpYi+aZyrVeINk2kwYFZEfIxAkEqPRyI03XMtDD/8Jg8HA9ddfw8HTpsfEzS233Mbpp51M0aAi5s8/BUmWWLtmDevW/cgdd97NSy+9iKqqHDx1Glabldde/RdWq5WiosEADBk8hKVLv+S0087AZDJ3qyw6HApx+WWXctNNt7Jteyl/+MO9/PrS3yKLLpqdpsOf2JAhu2f7bNu2jcLCQoYMGdLiq7CwsNvzmlKJ5jBTb5dme+qDLHs3KihHHJTH4HHZPXIeq8PEzFOHIysSZZsaWPdFWY+cJ1Vp9EfYVOWh1hckx24RQkYgSAJsNhvXXHsDF114Lj87ZjYOu50XXngptv6YY37Of/77Jos+/YQjjjiUo486nL/97XGKBkfFSnp6OgsWPMfPfjabGYdMZfGST/n3v98gOzt6nf3d7+9k+7ZtTJwwluKhhd2y9YjZRzJ8+AjmzDma88/7Fccddzy33XZ7t47ZX5H0fc0TaAdFUSgvLycvL6/F8traWvLy8pIqzOR2u0lPT8flcsV9gvbG7yr4+B/rSM+z8vOLxsX12O2hhjU+/edPNFT6yCqwc+SvRiP38E20dHVNrKvwjFOGM2g/XYX7A3sm+mbaTCLRV9DjSESwGt0UDR6M2dw/c9jmHvszJk6cyIMPPZpoU5KK5k7IiRpnEAlrGIwyji7kCgYCAUpKSiguLm41d7Ez9+8u3QWb+8nsTW1tLXa7vSuHTEl2h5l6L2fmh4+301Dpw2Q1MOPk4T0uZACGTsxh5MFR4brsnRJcVb4eP2eyoutQ3uBnS7UHgCy7EDICQW/yzDNPMyA/ix/Xrk20KUnBKSfPY9rBBybajITTqVqzU045BYgmi51//vmYzbsbxamqyurVqzn00EPja2ESs2c1U3sCL56UrKqmZFUNSHDIScPaHGLYU0w8qghXtZ+q0kaWvr6Zo887ALMtNduJdxWR6CsQJJZ/PLcgNkqnOYcl0Zxy8jy++mppm+uuv+Fmbrzx5h49/xN/eyrpPpNE0Km7UXp6tN+Irus4nc5YBjZE6+gPOeQQLrnkkvhamMRY7FExo+sQ8kcw27pWttcR6iu8rPgw2gJ7/OGF5BfHN2S2P2RZ4pCThrPohXV4G0J8s3ALh/1iVIdHJqQ6/rDKDpHoKxAklIKC7uWo9AR7iom9yczM6vHzJ+Nnkgg6JWaef/55AIYOHcoNN9yw35DS0qVLmTp1agsPTl9CVmSMZoVwUCXo6zkxE/JH+OqNLWiqzsAR6YyZkZiuvGabgZmnjmDRiz9RVdrI6k93MPmYvv8k0OiPUFrnxRMMk2O3IAoNBAJBM0JMJAdduizfeeedHcqNmTt3Lrt27erKKVIGkzWqBwM9VJ6t6zrL3inB5wphzzAx7YTEzktKz7MxbV4xAJu+q6J0dU3CbOkNaj0hNlU3EgipQsgIkgAdOl2yIRAkL12oQWqTHr00x8vIZMZkjeZN9FQS8E9flVO+xYWsSMw4eURMPCWSQaMzOWDmQAC+/2AbdWWeBFsUf7Q9En0lJDJFoq8gwejIaDoEgm2HNASCVMTnixaUdLW7cjOJvzOmOOZmz0wP9JqpLHGx9vNob5cpc4aQOcC2nz16jwMOK6Chyk/ZpgaWvr6FYy4Yi9XRN1r4xxJ93UEcJoNI9BUkCTLhiIma6moALGYrCIEtSALUiI6qSRiUjk9e13Udn89HVVUVGRkZKEr3rrNCzHSTZk9JvD0zPneIbxZGG+MVT8qheFLXu0z2BJIkMW1eMZ++GO0Q/NXrW5h99miUFJ8Q3ZzoW+sNkiESfQVJhoqDQMhDZWUV0dx7oWYEiUfVNBRFxtyFB7+MjAwGDOh+HqgQM93E1AOeGTWi8fWbWwj5I2Tk2zjwZ8mZZGs0K8w8dQSfvLCeujIvP3y0nYPmDknZkQexRN9AmGyRHyNISiRUnKgRDYmOPwULBD1JXW2AnFwLY0bn7X/jPTAajd32yDTTo2ImVW9qnaG510o8PTOrFu2grsyL0aIw4+ThKL08VbYzOLIsHHLSML749yZKVtWQkWdjxNTO/UInGl2HOm+IbXVeVFUnx2ER+TGCJEdG79mUR4Ggw0RUBSRDqw6+vYlIAO4msTBTnDwz29bWsmVFNCY+fV4xjszkL2sfMCydiUcOAmDlJ9up2uZOsEUdRyT6CgQCQerTJTFz1FFH0dDQ0Gq52+3mqKOOir1ubGxk2LBhXTYuFTBboi6yQBw8M65qP99/EB3UOfbQgQwckdHtY/YWo6blM3hcFroOX7+5FW9DMNEm7ZewqrOt1sO2eh9Wo4LTIqKuAoFAkIp0ScwsWbKEUCjUankgEOCLL77otlGpRLw8M+GgyldvbEYNa+QNdTLusIJ4mNdrSJLE1LlDyRxgI+SPsPT1zURCyTNwdG/8YZWSag8VrgAZVjGaQCAQCFKZTj2Krl69Ovb9unXrqKioiL1WVZUPPviAwsL+1Q3R1JQzEwlpRMIqBmPnb4q6rvPde6V46oJYnUYOOWkYUgqOCVCMMoeeOoJPnl+Hq8rPd++Vcsj8YUmXOyUSfQUCgaBv0SkxM3nyZCRJQpKkFuGkZqxWK3/961/jZlwqYDDKyIqEpuoEfREM6Z0XM5u+q2TXhnokWWLGycN7dMZTT2NLM3HoKcNZ8spGdv5Uz09fVzD20IGJNguIJvrWeoNsr/OJRF+BQCDoQ3RKzJSUlKDrOsOGDWPZsmXk5ubG1plMJvLy8uJWZpUqSJKE2W7E7w4R9Iaxp3cuYbd6eyOrP90JwORjisgudPSEmb1KTpGTKT8fzPcfbGPtZ7vIyLMmPP9H06Giwc/OBj9GRSbTnrqCUSAQCAQt6ZSYGTJkCACaJvob7InFZsDvDnU6CdjvCfHNW1vRdRh8QBbDp+Tuf6cUYdiBuTRU+tjyQzXfvF3C0eeNIS3buv8dewDR0VcgEAj6Nh0WM2+//TZz587FaDTy9ttv73PbE088sduGpRLmpqf8oK/jYkZTNb55aysBb5i0HEtKN5trj8k/K8JV46dmh4el/93M0eeNxdTLFUOio69AIBD0fTp8Z5k/fz4VFRXk5eUxf/78dreTJAlVTd4qlp7A0pQE7PH4qfRVkmfN328uxprPdlGzw4PBJHPoKSMw9EFvgazIzDh5OJ8sWI+nLsi3C7cy6/SRvZbcLBJ9BQKBoH/Q4cu7pmnk5eXFvm/vq78JGdjtmfF5AlR7q3GHXPvcfueGejZ+WwnAwccX48xOXNfEnsZiNzLz1OEoBpmKrW7Wfr6rx8+p61DjCbKpupFASCXHIYSMQCAQ9GU6fInPysqipqYGgAsvvJDGxsYeM2pPVFXl9ttvp7i4GKvVyvDhw7n33nuTqruwxR71zIR8Kp6IhypfFZredl5RY22A796NDpAcNS2fQWMye83ORJE5wM7U46L5Vj99XcH2dXU9dq7mjr5bq72io69AIBD0EzosZkKhEG53tE39Cy+8QCAQ6DGj9uTBBx/kySef5IknnmD9+vU8+OCDPPTQQ0lVAt5cSh3yqUhINAQaqA/Ut9ouElL56s0tREIaOUUOJszuPz15Bo/LZvQh0cmoy98rpb7CF/dzxDr61omOvgKBQNCf6PDVfsaMGcyfP5+DDjoIXde56qqrsFrbrk557rnn4mbgV199xUknncTxxx8PwNChQ/nXv/7FsmXL4naO7tI8bDLkU5ElGYNipMpXRbo5HYMcXafrOt9/sA13tR+z3cAh84ch97Nk1AlHFOKq8lGx1c1Xr2/m6PPHYolTiXTLRF8zRkW4Y/oDvpoAkUAEW64Vg7nv5Z0JBIKO0eG76UsvvcRxxx2Hx+NBkiRcLhf19fVtfsWTQw89lEWLFrFx40YAVq1axZdffsncuXPjep7uYNmzmkmXcBodNIYaqfXXxrbZsqKa7T/WIUkwY/5wrA5TosxNGJIsMf2kYTgyzfjcIb5+cwua2v0y/0Z/hM1VHmq9IbLtFiFk+gG6prPzm0o2LCxly4c7WfPSJta/vpXtX5ZTu8lF0B1KqlC0QCDoWTrsmcnPz+ePf/wjAMXFxfzzn/8kOzu7xwxr5pZbbsHtdjNmzBgURUFVVf7whz9w9tlnt7l9MBgkGNw95LA5NNaTmJtyZsJ+DV3XkSUZq9Ea8854KyOs/GQHABOOHETuYGeP25SsmCwGZp42gkUvrKdmh4eVn+xgypwhXTpW646+ZpEf0w8IecOULi7DW+kHwOQwEvKECTSECDSEqN0QTcA3WBTs+dboV54VW46l33lDBYL+QpeSCkpKSjq03YQJE3j//fcpKirqymkA+Pe//83LL7/MK6+8wrhx41i5ciXXXHMNBQUFnHfeea22f+CBB7j77ru7fL6uYG4aNokOWgiwg81go9ZfS3ldFT++WY+u6RSOzmDUtPxetS0ZScuxMv3EYSz972a2rKgmI9/GsMmdaxgoOvr2TxrLvJQuLiMSUFFMMoMPG0jGUCdhfwRvpR9vlR9vpb8p/KTi2ubBtc0DgKRI2HIs2PN2CxyjVeRVCQR9AUnvQV+s0+lk1apVDBs2rMvHKCoq4pZbbuHyyy+PLbvvvvt46aWX+Omnn1pt35ZnpqioCJfLRVpaWpftaAuvK0hliQtHpoWFf/6BUECl8GQYMDALAH/IT8n7AfxlOo5MM8dccABGEdePsX5pGWs/L0OSJWafPYqcQR3zWIVVjZ31PipcQZwWA5YuDPcUpBa6rlO5spbyFdGKSmuWmeKjCzGntR2u1SIavtpAVOA0iZxIoHXbCHOaEXu+LSZwLBmmPte8UiDoaaqr/AwYaGX6lIK4HtftdpOent6h+3fSP5b4fD7kvZqEKIrS7kgFs9mM2dy5+UjxwGw3EgqoaHsUedWv1PGX6cgGmHHKCCFk9mLMoQNpqPKz86d6vnpjC8ecfwC2dm5OzYhE3/5HJKCy7bMy3Du9AGSPSmfQjHxkQ/shI9kg48i34ci3AVExFHSH8Vb6Yt6bQEOIoDtM0O2iblM0NKWY5N2em3wb9lzLPs8jEAiSg6QXM/PmzeMPf/gDgwcPZty4cfzwww/86U9/4sILL0y0aS2w2A001oIWiN5cXaUhypdHlU32oUB6EEjMbKJkRZIkDj5+KI11AVxVfr56fTNH/moMirHtm8fujr4R0dG3n+Ct9lOyaBdhbwRJkSg6NJ/sURmdPo4kSVjSTVjSTbH9I0E1Jmy8VdEvNaTh3umNCScksGVbYrk3jjwrRhHSFAiSjqQXM3/961+5/fbbueyyy6iqqqKgoIBLL72UO+64I9GmtaC514zqlwi6VUoWRS+GuePNpI0MU+WrwmlyokjCO7MnBpPCzFNH8MmC9dRX+Fj+v1KmzStu4eoXib79D13XqVnfwK5vK9G1aDio+OhCrFnx65ZtMCukFzlIL4pOqtc1fXdoqknkhH0RfDUBfDUBqn+MVmqaHMZYzo0934o109xrIzoEAkHbJL2YcTqdPPbYYzz22GOJNmWfNPeaiXh1tnzgQQ3q2PIUBs20oUsa9YF6GgINZFt7vgIs1bBnmJlx8jA+/9dGtv9YR+YAG6OmRRvsiUTf/oca1tj+ZTkNW6NdxjOGOhl82ACUHp5fJskS9lwr9tyoB1XXdcKeCJ4qXyz3xl8fJOQJE/KEqd8SrZSUjTL2XEs0LJVvxZ5r6XFbBQJBS5JezKQKzb1mXD/q6JqKYpEYPseBrEiAgtlgptJXSZo5HaMsPva9yRuSxqRjilj58Q5WfbqT9FwrWYOdItG3n+GvD1KyaBdBVwgkKJyWR+64zIQk5UqShMlpJMuZTtbwdADUkIq3ujmx2Ie3OoAW1mgs89FY1tTVWgJrpnmP0JQNo8MgEosFgh6kS3fVHTt2dKjc+umnnyY/v3+UIjcPm2weyTTsZw5Mzt03X7vRTp2/jlp/DQPsAxJhYtIz4qA8Gip9lK6u5eu3tjLs+MF4FV0k+vYT6ra42PFlBVpEx2gzMPSoglgCb7KgmBTSCu2kFdqBaGjKXx9sEZoKecL464L464LUrG8AwGgztAhN2bItIjQlEMSRLomZoUOHMmvWLH71q19x2mmnkZnZ9rDEX/7yl90yLpUw79GvomCalbSiluEQWZKxGW1U+arIMGdgMfTdSdldRZIkpswZQn2VH1eFjy0f72TMiUOFkOnjaKrGrm+rYjd+R4GNobMLUqIHjCRL2LIt2LIt5B4QvQ6GvWE8zYnFlX58tQHCvggNJY00lERDZ7JBwpZrbdHzRoxjEAi6TpeuFsuXL+eVV17hnnvu4corr+TYY4/lV7/6FfPmzUtIWXQykFVoQ7GCvUBmwEFtCxWb0Uatr5Zqfw1FzkG9bGHyo+tQHwiTMSMXz/92EnGH2fFFOcVHFwoXfR8l2Bii9NMyfDXRyr8Bk7MZcGBOSnstjHYjmcVGMoujfTG0iNYUmvLFPDhqSMNT7sNTvnvgqiXDtLskPM+KOc0ofu8Fgg7SraZ5uq6zZMkSXnnlFV5//XU0TeOUU06J66DJ7tKZpjudZc+meREtwrqadRgUwz69LkE1RCDiZ2TmSBxGR1ztSWU0Hcob/OxqSvSV3WE2vbcdXdMZcGA2A6d0rkOwIPlxbfew7bMy1JCGYpYZckRBrLKoL6PrOoGG0B6hKR9Bd7jVdrFxDM2hKTGOQZCkJEPTvLh1AF6xYgUXXXQRq1evRlVbd9pMFL0lZsJamHW16zApZszKvhu/1frryLZkUZxeLJ68aL+jb+0mF9s/Lweg+OhCMob235lWfQld0ylfUUPlquggVluuheKjCjE5+m+lWtgf2d3zpmkcg661vDRLctM4hnwxjkGQXCSDmOnWX8LOnTt55ZVXeOWVV1i7di0zZszgb3/7W3cOmbJoTZm/HdEmaWYndYE6sqxZZJgzetawJMcfVtle66OujY6+2SPT8ddG+3ts+6wMc9pQrFn9M4zZVwj7I5QuLouFV3IOyKBwWl6/9zgYrQYyhjjJGBIV7Jqq4atp2fMmElBjzf1YE91PjGMQCKJ0Scw8/fTTvPLKKyxdupQxY8Zw9tlns3DhQoYM6dr0476Arutouo7E/i8kRtmILMv9vpGe2x9h2346+hZOy8NfF8RT7mPrJzsZfdJQkSiZongqfJQsLiPiiyAbJAbPGkjm8Ph6S/sKstLOOIamsJQYxyAQtKRLYua+++7jrLPO4vHHH2fSpEnxtikliUbrdGQ6duFwGtOoD9RRH6gnx5rTs8YlGbGOvrU+VG3fHX0lWaL4qEI2vF1KqDFM6ae7GD6nKKUTRPsbuq5TtbaOsu+qQY8muhYfXYglQ3jZOkqLcQwjoz1vxDgGgWA3XRIz27dvF67MvdDQmkJNHftcFFmONtLzVpJuTsco948LzN6Jvh3p6GuwKAw7ppCN72yjsczHru+qGDS9f/QvSnUiQZXtX5Tj2uYBIHN4GkUzB7Q7f0vQcbo+jsHQIjQlxjEI+gIdFjOrV6/u8EEnTpzYJWNSGZ2mMFMnrgkOo4Nafy01vloGOvp+I72wqrGzzkeFu/Mdfa1ZFoYcPpCST8uoXluPNcsSe0IVJCe+2gAli3YRagwjyRKDDskje0yGeBDqITo+jiFCyONuYxzD7tCUGMcgSDU6LGYmT56MJEk0Fz/t64KUTNVMvYWu66DryFLHnzglSYo20vNXkmHJwNqHG+ntK9G3o2QUpzFgcpCKlbXsWFoR7cuRKyaRJyO1GxvY8VUluqpjchgYelSh+Fn1Ml0bx1DbahyDPc+KySF63giSmw6LmZKSktj3P/zwAzfccAM33ngjM2bMAODrr7/m0Ucf5aGHHoq/lSmAqndNwO1upFdFkWNwn5wG3ZFE344yYEoO/rogru0eSj7ZxeiThmK0ifLUZEGLaOz4qjKWkJpWZGfIEQUiaTtJEOMYBH2VDt8F9qxUOv3003n88cc57rjjYssmTpxIUVERt99+O/Pnz4+rkamArtPRdJlWOMxOav21ZJqzcJr6TtOwziT6dhRJkhhyxEA2vrONQEOIkkW7GHFcUb8v7U0GAq4QJZ/uIlAXBAkGHpRL/sQs8USfxHR1HIOkSC0nhYtxDIIE06VH2jVr1lBcXNxqeXFxMevWreu2UamI3jxhsguYFRPekIcqXyUOo71PXPy7kujbURSTQvExg9j4dineKj87v6qkaNaAPvG5pSoNpY1s+7wcLaxhsCgMPbIAZ4E90WYJusA+xzE0iRw1pOGp8OOp8Mf2i41jyIvm3ohxDILepEtiZuzYsTzwwAM8++yzmEzRbrehUIgHHniAsWPHxtXAVEGnG64ZIM2cRn2gngaLi0xLRtzsSgTdSfTtKJZ0E0OPLGDLRzup3ejCuseTpaD30DWdXd9VUb02Wiljz7dSfGSBKP/tQ8gGGedAG86Bu3veBBpCe3Qsjo5jCDSECDSEqN0QDTEaLMoePW/EOAZBz9IlMfPUU08xb948Bg0aFKtcaq52evfdd+NnXQqhoUM3BkMYZAOKrFDpq8BpdmCQUjMPJB6Jvh0lbZCDgoNzKVtWzc5vKrFkmmMXXEHPE/KGKV1chrcy+nSeNyGLgqm5vZJLUekJsrrSzdqqRgIRjQyLkQyLoen/lt9bDLLwEMQRSZKwZpqxZprJGZ0B7DWOocqPrzpAJKDi2u7BtT1alt9iHEOTyBHjGATxoku/SdOmTWPr1q28/PLL/PTTTwD84he/4Je//CV2e/90LUfDTN0bc+U0Oan311MfaCA3BRvpxTPRt6Pkjc/CXxukfoub0k93MfrEoZicwivQ0zSWeSldXEYkoCIbZYYcPrBHZ2f5wyo/VjeyprKR1ZVuKr2hDu9rVmQyLAbSm8RNpsVIekzsGGLLnGYDshA9XaLdcQx75N60GMfQRHQcgxV7nk2MYxB0iy7LYrvdzqxZsxg8eDChUPTCsmjRIgBOPPHE+FiXQui63p0oEwCy1NRIz1NBuikN034GViYLug41niA76uKX6NtRJEli8KwBBBqC+GuDbP1kJyNPGCKasvUQuq5TubKW8hU1AFizzBQfXYg5Lb6/q5quU9rgZ3Wlm9WVjWyq9aDu8aygSDAq28GEfCeZFiOuYJiGQISGQLjpK/p9IKIRVDUqvaH9CiBZgjTznt6d9r09JhEu2SctxjFMiP7ehBrDeCrbGscQpm5TtOdNy3EM0Z45YhyDoCN0Scxs3bqVk08+mTVr1sR6z+yppvtjn5loaXb37+B2oz3aSM9fQ4EjvhNIewJNh7IGP2UNfkwGmTRr73tFZIPMsGMGsWFhKf66INu/KGfokQXiCS/ORAIq2z4ri7XKzx6VzqAZ+XG72dT7w6ypioqXNZWNNIYiLdbn201MzE9jYn4aB+Q6sHYgFysQUfcSOS1FjysQoT4QpjEYQdNpWhcB/Ps8rs2okG42kGmNipt0c9uix2FSxO8h0YcOc5oJc1r3xjHY86yYRD6WoA26JGauvvpqiouLWbRoEcXFxXz77bfU1dVx/fXX88gjj8TbxpRA1bW4eCMkScJuslPtrybTnInVmLyNxnoj0bejmBxGio8uZPP/ttNQ0khldh0DJmUnzJ6+hrfaT8miXYS9ESRFoujQfLJHZXTrmCFVY0ONJypeqtxsdwVarLcaZA7IdTIx38nE/DTyHZ2f5WQxKAxwKAzYz76qpuMORoWNqw3Rs+f3YU3HF1bxhVXKPcF9HleRpHa9OxlN4a7msJehN+KySURb4xj8tYGo90aMYxB0ki6Jma+//ppPP/2UnJwcZFlGURRmzZrFAw88wFVXXcUPP/wQbzuTHk1TO9X9d19YDVZqfbVU+asYbBiSlI30ejPRt6M4BtgYNCOfHUsrKV9ejTXTTPrgvtO3JxHouk7N+gZ2fVuJrkVzHIYeVYgtu/PdqnVdp6wxGAsdra9pJLRH7EgCijNtTMx3MiEvjZHZdgy9dJNSZIlMq5HM/XgWdT0qZNoSOq5gmHp/9LUrGMYTUlF1nVp/mFp/eL82OExKk7CJip7d37cUQdY+mtAsyRK2XCu2VuMYdoemxDgGQXt0ScyoqorTGU30ysnJoaysjNGjRzNkyBA2bNgQVwNTBRUVKQ5hpmaclmgjvSxLFk5TzyVWdgW3P8K2Wi+eYO8l+naUnDGZ+GuD1PzUQOmSMkafOERMZ+4ialhjx5cV1G+N3jTShzoYctjATt0oPKEIP1Y1sropcXfvm3qGxdAUOnIyPi+NNHNyV7dEPacG7CYDhWn7FnRhVcMVjAqden849n0rERQIo+rgCal4Qio73IF9HtekSO3m9ewZ+kpL8YTm3eMYjGQNj/a8aTGOoSk0tc9xDE3eGzGOoe/TpSvH+PHjWbVqFcXFxUyfPp2HHnoIk8nE3//+d4YNGxZvG1MCVdOIpwvFJJvw4qXCW4ndaI+b16c7JDLRtzMUHpIfa9G+9ZNdjD5xiHhS6yT++iCln+4i0BACCQqn5ZE7LnO/NwRV09lS72vyvrjZUudrUeNnlCVG5zhioaOiNEufvckYFZkcm4kc276TozVdxxNSWwkdVyDcFPbaLYL8EY2QqlPlDVG1n4RmCUi3GEg3N1VxWXd/v3e4y5wiSbbtjmPYo2qq3XEMe/a8EeMY+hxdEjO///3v8XqjCVr33HMPJ5xwAocddhjZ2dm89tprcTUwVdDRkOPomQFIM6XREGzAFXSRaUlsQ7hkSPTtKLIiUXx0IRsWlhJ0hShdUsawYwaJi1cHqdviYseXFWgRHaPNwNCjCqJVKe1Q4wvFQkdrqxrxhVsWABQ6LTHxMibHkTI3zt5CliTSzAbSzAYGp+87Ry4QUVuIm/byetzBCDq7E5q3ufad0Gw1yK3yeFrm9ERfO5MsobnFOIaxbYxjqPLjq2kax1DaSENpG+MYmkSOGMeQ2kh68xjsblJXV0dm5v6f3Hobt9tNeno6LpeLtLS0uB7b6wpSWeLCkWlhfe1PRLQwdlN8++y4gm4sioWRmSMwyIlxwSdTom9n8NUE2PjuNnRVJ39SNgVTcxNtUlKjqRq7vq2KPc06CmwMnV3QqrFZIKKyvtrDmqpo6KissWUSrN2oMD7PGQsfZe/HMyGIP80JzXvn8ewZ7ooujyY0dxRFklr16GmvjD1ZEppj4xj2yL1RQ63Hz1gyTHt4b8Q4hs5QXeVnwEAr06fEtwK3M/fvuN0ds7Ky4nWolEPXQUONa5ipGafJQZ2/jvpgPbnW3r8ZNyf61nqCZNqTI9G3o9hyLAw+bADblpRTuaoWa5aZzGHxFbR9hWBjiNJPy/DVRPM18idnM/DAHCQ52nphu8sfy3vZUOslorVM3B2ZbWdCk4AZnmVL6VyNvsCeCc1DM9rfTtd1/BGt3dL1Pb9vTmiu84ep62BC8955POnmluGuTGvPJzS3HMeQja7rBF2hpp43Ue9N0BXaPY5hYzvjGLItoudNEpPc2XYpgo4W7bXTA8eWJRmrwUqFp5I0UxpmpfeSWfdM9M1xJFeib0fJGp6OvzZI1Zo6tn1ejjnd1KVKnL6Ma7uHbZ+VoYY0FLPMkCMKkPLMfLWzPlY2He29spscm5EJeWlMGpDGuFwHdpO4lKQikiRhMyrYjAoFzn3/XUQ0LdaXpznUtbuUvaUAUpvygDwhlZ37scEoS+14d1q+TjMbUOIQKpYkCUuGGUvGPsYx1IhxDKmG+EnEAa1JzChyz4RfbEZbrJFeoaOwR86xJ6mS6NtRCqbm4q8P0rjTS8knOxl14lBxESKaPFm+oobKVbUASBlGNg418+rGnZQua5ljYVZkxubuTtwd6DALF3w/wyDLZNtM+w0barqOt0VCc8sqLlcwEgtx+SMaYU2n2hei2rf/hObdHZrbG08RXWcxdO5a3NY4Bn9NEE+VT4xjSBHEFT0ORMNMWo8Nh5QkCYfJQbW/mgxzBnZjz82/SqVE344iyRJDZxew8e1Sgu4wpZ/uYsTcwf06ITjsj7Dxk52EqqJhpdUWlU90P2rp7m2GpFujPV/y0xidbccoWvgLOoAsSTjNBpxmA0X7SWgONoW4msdRRHN62u7UrAOuYARXMMI2175tsBrkNnv07P29w9R2+bqsyLHwUutxDNHcGzGOIbkQYiYORIdMQjzGGbSHxWDB6/NR7a/GZrD3iKckVRN9O4LBrDDsZ4PY8PY2PBV+dn5bSdGMAYk2q1fxhVXWVTeyZUsDgzb7sWkSIXQ+tIX5yaSSZjYwIc/JhCbvS4Yl9YWsILkxG2TyHeb9dnfW9GhC877yeprDXUFVwx/R8HuCVOy3QzMx0dM8kiLTuvd4iqjw2d84Bl+1GMeQSFJCzOzatYubb76Z//3vf/h8PkaMGMHzzz/P1KlTE20aAJquoel6jyc9pjU10ss0Z5JuTo/rsZOxo2+8sWSYGTp7IFs/3kXNugZsWRaym2LmfZGWwxrdbKrxcmDAwBEBAzIStbLG2iIDkwZlck5+GkMyrCJxV5CUyNLuvJp90ZzQvL+RFA2BCI2hCKpOhxOa7UalbU9PnpGMwZlkm3Kx+FW02pAYx5AAkl7M1NfXM3PmTI488kj+97//kZuby6ZNm8jMTGzflT2JVrfryPSsO9EoR/+Qq3xVOE3OuDXSc/nDbK/14QklX0ffeJM+2MnAKTmUr6hhx1cVmDNM++yhkmrU+8Mx8bKmqhFPKNrzxaTDCT4To8JRb5s+0MKM2YUcYxNPiYK+w54JzQM7mNDceiRFZI/QV3RdRNPxhlW8YZVdjfu2oTmhOT1fYYBiZGBEJiugY/OoKB513+MY8mzY88Q4hq6Q9GLmwQcfpKioiOeffz62rLi4OIEWtaY5Abgnw0zNpJnSaAg00BBoIMvavXL4Vom+9tRO9O0o+ZOz8dcFaShtpGTRLkafNDRlXb97DmtcXelu1QrfapCZnmZnSrmGEtaQZInCQ/LIGZMhEhUF/ZqOJjTrelTItJXMvLe3xxdW90hohs17Tl83gDEdCiIyBapMYUSmUJUx7TWOQQd0pwFjthlHvpWcQQ6c6SLhfn8kvZh5++23mTNnDqeffjqfffYZhYWFXHbZZVxyySVtbh8MBgkGd8dJ3W53j9uoo6PFaWr2/lBkBYPBSKWvkjRzWpcb6TUn+u5q8GPuI4m+HUWSJAYfPpCAK0SgPkjJJ7sYefzglEjU03WdXY0B1jSJl/U1nnaHNU7MTyOrJkLZN5Xoqo7JYWDoUYXYc5N3ErtAkGxECzCiycKD9tOmKqRqrfJ46tsYT7EjGEbTQdIhR5OiwqZJ5GRoMlJjBLUxgqvUi+vbGjyyTo0JPA6ZsNOAkmki02ZsNZ7CmeLzuLpD0ouZrVu38uSTT3Lddddx22238d1333HVVVdhMpk477zzWm3/wAMPcPfdd/eqjbquo+v02vwkpzHaSK/WX0u+Pb/T+/flRN+Oohhlhv0sOvLAVxNg+9IKhhw+MCmffjyhCGubhjWuaWNYY6bFGEvaHZ/nJM1sQIto7Piqkl2bomUfaYPsDDmiAIOl//2sBYLewqTI5NnN5Nn3n9DcGIy08u5sD0T4qTGE4gpj92hkBXVyIxIOTcIRAAI61IQJl4QoVzTWGTR2GTTKFI2ADLJEk8DZY/hoi/EURjKbytpNfaw6MW7jDHoKk8nE1KlT+eqrr2LLrrrqKr777ju+/vrrVtu35ZkpKirq0XEGqi3IhtoNrA+spdg2nEHWoriep81zh72gw8jMkVgMHW8C5w+rbKvxUe/ru4m+naGxzMvmD3aADoXT88gbn/hO1tFhjd5Y6KitYY1j9hjWOGivYY0BV4iST3cRqAuCBAOn5JA/KTsphVpn0HWNiBYhoodj/4fVECEthI4em1q/97vc831Ht9lzi5Zb77k2ut8e++79eq89m88j7W1Bq5f7+jm0d/y992t19pav9/mj3vvzaHvH1odo/xwdfU/R/fb9+Xf4/PvZuu3z7euM+/7daesnsnu7dk/fbXyBMNVlXtwVPoI1AaT6MHKk9W27Ro4Km12KRplBo17W95n5YDMqe/Xoabthod24/3lcfWqcQU8xcOBADjjggBbLxo4dy+uvv97m9mazGbO597rkQjTMtD1Yyovb/4FdcXDP2D+SZoxvtdHe2Ay7G+kNcg7a7/aaDrWeILsa/PjDap9P9O0ozgI7hdPz2PVNFbuWVWHJNMcm8vYm1d5QLHH3x2rPPoc1js11tPtU1VDayLbPy9HCGgaLwtAjC3AW9P776SotBUuYiB4hrIYIaH4iahiVMKqmojU9g0kSKPvo76TvIQNbP7bpbXzX3hK93VV6O68kpBbr9r4d7L1Ob2c7pLZs78Ax97Nfq23bOVCrbfdlwD7EWIsl+xV4+xB1HRRq+xIfnXm1L6HW+u23fc69RbQk7f05ti+4JSSUPMjMAwkZXTehNuqEqzXCtTrhGg21USdHk8kJyUxq2k81gscO9RadcqPGLkmjIaTRGNKIaNFWDb4OJDQbZEgzK6SZ5ab/FdItTa8t0de6N4IlmNgHwaQXMzNnzmTDhg0tlm3cuJEhQ4YkyKLW6LqOOxzNzfGqHl7d+U9+XXxFj55zdyO9GjIsGTiMjna39YYilDUEqGkMYDEayN1PT4f+Ru4Bmfhrg9RtclH6aTQh2JzWs8MRm4c1Nntfyj2thzVOyHcyIa9jwxp1Tafsu2qq1tZF98+3MvTIgqRMbG5bsAQJaIGYYIloKvoegsUgGZAlA0bJgsWo9FpIV9A2e4skvQ05uOfa9nbcl4jc5xH3EVDYty2dtWffx9X3XtfGAVrvp7d/Ir1j7x8b6EN05CFgBrQAaLUSWp0c/b9eQglLpDdAOhJDUUCWkTJ05CyVSIaG16HRCHhCTV/Bvf4PQSACEQ3q/Cp1fhVov4R97M5G5swYvS+re5SkFzPXXnsthx56KPfffz9nnHEGy5Yt4+9//zt///vfE21aDE3XCGq7q0i+a/iWaQ0zmJxxUI+e12Kw4Av7qPbVYE9ztHooUDWd6sYgZS4/wYhGps2MoZ+HldpCkiSKZuYTaAjiqw6w9eOdjJo3JK7lkfsb1ihLMCLLHvO+DMvs+LDGkDdM6eIyvJXRyom8CVkUTM1NaO8KXdcIa2FUPRITLKFIgKAebCVYdKJ9RAySsodg6b+JjKnA3j+aDoeZxI+0ZzACTmBo9KWu6kTqo16bcI1GqFpDD0rodRJqnYwEOIB0h4QxR8KYI2McJKOkSS1CSmFVxxPScAd13EGNxqBOY1CjMRhdtuf3aebEPmAkvZg5+OCDefPNN7n11lu55557KC4u5rHHHuPss89OtGkxNHSCWvTJWkZGQ+OlHQsY5RiDzdCzLn6n2Umdv5ZMSwYZ5ozY8sZAhPIGP7W+IDbhjdkvsiIz7OhCfnp7G4GGENs+L6f46MJu5Zm4AmHWVEXFy5rKRlzBvYc1mmLipavDGhvLvJQuLiMSUJGNMkMOH0jGUGeXbe4MmqYS0SNtCpawGkIj0iRYNJpzSoRgEQh6HknZLVIg+jClenTCNbsFjuqKLlM9OoHSaBd7yUhU2OTI0f2zZTKtCpn7KYAsr6onPy+x4eykFzMAJ5xwAieccEKizWgXXdcINYmZqZnT2eYroTJYwX/K/sV5gy/u0XMbZSOSLMUa6WmaTLU7QLkrgKrrZFnNKMIb0yGMdiPDji5k03vbcW3zULGyloEH5nT6OBtqPLy0ehdb6n0tlpsVmQNyHUzMj4aOBnRjWKOu61SuqqV8RQ3oYM0yU3x0YdzDY82CJaKHUbXIbsGiBQhr4SbBEol5x5sFiyIZhWARCJIESZIwOCUMTrAWRz3OWmi3sAnX6IRrNfQwhMo1QuVNI3okMGRKewgcGcXWRjaUBKYE32dSQswkO7qux8JMTkMa5w2+mIc23ceXtZ8xLXMGY53jevT8TmMaDYF6tjdUEwjYaPCFcJiNWEUXyU5jz7NSNDOf7V9UULGiBmumucOejoim8d91FbyzoTIW724e1jgxP41RcRrWGAmobPusLDb/JWtUOkUz8rvcJycqWKKelWbBEogECDcJFrXJ+9JCsMgGFISHRSBIVWSThLlAwVzQ1BVc04k0NAmc6qjI0fwQqdOJ1Kn4N0aLEmRbk/cmNypuDOnJ8bcvxEwciObMRMfXW2QLIx2jOTLnGBbXfMKL2//BXWPux6x0vHy68+eHBp/O9rpSCizFZNvtolKpG2SPysBfG6R6XT3bPi/HnG7CmrnvMN1Ot5//+24bpQ3RvJXDh2Txi3EFZMa5GaG32k/Jol2EvREkRaLo0HyyR2Xsdz9VU1H3ECxhPUwwEiSsBQhpYbRWgkXGICsoGDArVmRJEYJFIOjDSLKEMUvCmCXDqOgy1bun90Yj0qCj+SC4XSO4vSk0ZQApzYSnOAyHJc5+IWbigKqrBPWoZ8bSJFpOKTiDVa4fqAlV81b5f/nFoF/F/8Q6uANhqhuDeAIyuuxFNniR5dQpxU1WCqfn4a8P4in3sfXjnYw+aSgGc2tPl6brfLSlmn+tKSOs6ThMChdPGcy0woy42qPrOjXrG9j1bSW6BuY0I0OPKsSWvVsk7ylYImqECFHBEmoOCcUES1NPlj0Ei1EIFoFAsBeKXUKxK1iGNIWmwjqRWq1F7o0eAalOIeTQEmqrEDNxQEWN5cxY5OjNxaJYOWfwhfxly8Msqv6IqZmHMNw+Im7nDEU0ajxB6r1hJAky7RaCqk5dqAanKQ1TD3qC+gOSLFF8VAEbFm4j1Bim9NNdDJ9T1KJCqM4f4unl21lTFW3UMCnfya8PGhJ3b4wa1tjxZQX1W6Pl/84hNvJmZBAxBKgLNhKMBAiqQSJ6k2DRIsQmhQnBIhAI4oRslDANUDANiL7WNR3VrVNT4sFW0DuFB+0hxEwc0LTd1UwWZXfa9/i0iczImsXXdV/ywvZnuX30vbHJ111F15q8MZ4AvqCKw2zEaIjenCwGK65gHQ3BWvJshd06jwAMFgPDflbIxne20Vjmo+y7agqn5wHwzc56/rFiB96wikmROHtCIccMy4lLl121OYdFi+CvD1DxWT1htwoSmCfoREa42BWsRQ82dayVZJQ9BYtBCBaBQNDzSLKEIUOCQSqWAYnN0RRiJg6oWmS3mJFbekR+UXg2a92rKQ/s4v3Ktzlp4KldPk8wrFHjCVDnDWOUZTJtplZ9G6wGB/WhOpymDKw9XBbeH7BmWRh8+EBKPy2jam0dcrqRNxtdfLm9HoBhmTYuO3gIBc7OecKaE20jWrSsWdUjBNVmD0sIVVcJb9MJ/2AEVQKLjvUQFVOOAUUWHhaBQCDYEyFm4oCGFhMzeyf62g0Ozh50Hk+V/pX/VbzDQRkHM8g6uHPH18AVCFHlDhKKqDjNJpR2RLBJMRGIeKkLVlOg2FJ+Hk8ykFmchn9ykMqVtexcWsEmRxDJAPPHDODksQMwtNGcTtdB05sFS9TLsluwBKKlzrqKqkVi7eLlZg+LaiC0WiG8JZqNa8yXSZ9hRLaIn6VAIBC0hRAzcUDVNIJqUwKw3Lq70JSMgzkwfSo/uJazYNuz3Dr6ThSpYy45f0ilxhOkwRvCZFDI2E9bewCb0Yk7VE+aMROnqWdnRPUHwqrGYkMQk0FlREThVL+ZnGMKGVFgQ9WDhCKRFp6WkBogpAWjYkWPoGrqXoLFgAEFo2JCMRhadFNVvTqupSEidVEhYztAwT7ekNBuvgKBQJDsCDHTTXQdNFQCTX1mrEprMSNJEr8sOo8NnnVs85fwSdUHzMk/fp/HVTWdBm80Nyas6jitJjraosQgG5BQqA9WYzc4kGXRb6az6LpORIuwzeXl6eW72NUYwmSHSwJW7EGJ0PIyth6uocsqWlPCLUSjfrKsoEgGFGSMiq2VYGmPYJmK+5sweggkE6QdYoz1gBAIBAJB+wgx0000XUfTds9msrRTRZRhzOCMwrNZsP0ZFpa/zuT0g8i3DGhzW19IpboxiNsfwmxQyLB1/sdkNzpwhxpwhxvIMGd3ev++TnTY4f+3d+dxUlT33vg/59TW+6wwDDKCoAiKiAsQIEZUjF4NEfJL1EiM+jzGm5foVXlMotEEExMxuV41iRoNuVHj1WCMQK6KxIgBAVEUWVQQRVkU2YbZp6eXqvP9/VHVPd2zMPvSw/ftq53p6urq0zXN1GfO6sDxZrB1lA0HNpJ2HHGKI2EnsGZPEit2OHAICBjA18YA+f4YEv+y4BwWSG6SCJ5pQNO7NmkcKUL9+zaiW91JqfRCgbxpJrQg18Ywxlh7cJjpMoWESqZXRm3aATjT1MKzsL5yHbbWvo8/f/ZH/L/jf5y1+q/jECrqEyivi0MREPGZnZ78TgoJUzNRETuEoB6BofW/1ZN7klIOHHK8afi95h5yEHdiSKq4G2DgNgEpUo39qIVEXVxg6VbCzkp33oSxgwxcOi6AkLeQWnyqg+rXk0jsBMxCAeOEzocOFSNUv5FE8qD7Wv4TNIQm6BC8BAVjjLUbh5kuIiLEVOMaPKZsfaZYIQSuLPs/uOvD2/FR3Xa8fvhfmF58HgCgLmbjUG0ctbEkApYOq5NT02fyayFUJypQnahAsb+ky8frT1ITxDnkpDvX2iqJhIoj4SThkLtukKMcKBBA2cOYJTQYwoJPl+lmOCLCxn0JLNnagJhNMDXg62MCmDTMzOpIbZVqCI4n1G+2UfeuDT1Pwhzc8Z9X4pBCzRsJqAZ3Fs3wmQZ8I7hZiTHGOorDTBcpEBocdwp7S/qyalpaUmwNwuzSb2HR3v/B83sXYWxwPJAIo7w+ARCQFzDRXX09hQD8WgCViXKEzbweXVKhO2WOBHKopc61CThkQ5Fb+0JEgDdNnBQaNCkz1g3S2vyZAEA0obB4axSb9ycBAMfmafj2+CCKgy2Hi8AYDXalO6V39doECr9qtbtZiIjQsN1B3WYbIECLCORNM6Dn8RoUjDHWGRxmuoiIEEuPZGpfWDhn0Pl4u+otfFL/Mf60879xUejfEfIZMLuhNqYpS/ehKl6Jylg5SgLD2tURtaelOtc6cIcmp4JL0kkgoRJuMxA5UORAZa4XBLdzrRRaejRQd8y38lF5Es++V4+aOEEKYMYoH84d6YN2hFQphEBkkoHK2gTsSkL1mgQKzjMh9COXRSUIteuTiH/uNitZx0qEJxqQRj/4wTDGWI7iMNNFBEqPZGqt829TtgP8W/538Pv6u/Fp/AN8HtqE8frkHitjUA+hOlmBiJOPgB7qsddJyexc6yg73T8laceRoCSSThwOvLCinMaRQALQhO6GFaFByu4JK61JOoRlHzVgzW53jqBBQYlvjw+iLK99/yyELpD3ZRMVr8RhVxJq1icRmWK0OrdPslKhZm0STh0BEgidpsN/vMZzATHGWBdxmOkigkLca2ZqaY6ZrH0zliIQ8UJMi1yE12v+F/+ofBaj/GMR1CI9UkZDM9Bg16Midgj+YLDLF8/GzrXeKKB051p3FeZ051pyoFRm51rhhhShQxcGNGlBtnPYcnf7vNrGX7bU42C9W0My9VgLF5/oh9nBjrda0B15VPWvBOJ7FKIFDoJjm/+zavjURu0GG3AAGQDyppkwirhZiTHGugOHmS5SBDS0o2YmtRRBZTQJXbhLEZwVuBDbG97FgeTnWF7xLP6/Qd/rsXIGjRBqE1WoM6sRNvOPuG/j6B87o3OtjYSKtdq5FmicwVZCgy5M+KQGoWv9omkrRRFh5acxvLIjBoeAsCVw6bggxgzq/Ggvc7BE+Awdte/YqN9sQ88T6flhyCbUbrAR2+kOuzZLJSJfMiCtfnRSGGMsx3GY6SKizNl/m4eZ9FIEtXHEEw4ivsalCDRo+HrRVfjj/gX4IPoOxkUn4sTAhB4ppyZ1SKnhcKwcPi0AgkrXrNjkuCOB0p1rHRBs2CrVuTY1EqhznWv7i4qog79sqceuKjdYjCsx8M2TAwia3TBy7HgdyUpC7BMHNeuSKDhfAAKoWZuEXeUOpQqO0xE4iZuVGGOsu3GY6SIildFnJruZqelSBC0tDFlqHYupka9ibc1yvFTxDIb7RsMnAz1S1oAeRk28ArvrdrhNQF7n2lSRUp1rNWjQpB8+bWAsZkhEeGdvAn/fFkXcASwNmHVSAGcMNbs1WIRP1+FUKyTLCVWrkqAEgZKAsIC8KQbMPl5VljHGBioOM11EQLPZfzu6FMFX8i7Gh9GNOGwfwD8rn8fMoit7pKxSCITMPChyYPRw59r+oi6h8PwHUbx/wB1yfVyBjstPCaAw0P3BQmheh+B/xKHqvUUiiwUiU01ogYF9nhljrC/lThtBP6VA6RWzfdKHaMLB55UN2FsVhRQC+QGjzTWVDGnia16A2Vi3Bp82bOux8upSh6lZ0GXXpuDPBdsOJfFfa2rw/oEkNAFcNNqP708K9UiQSZE+gbyvmNALBAJjNeSfy0GGMcZ6GoeZLiJSSHhhhhwTu8vrURezkec34Tfbf9Ec7jsBE8PTAQAvVvxP+pis4xI24fkP6vGnDXWoSxBKQhI3TgnjnJG+XglwRoFE4QUWQqcavNo1Y4z1Ag4zXUbpZqZYXIOUAhG/3qk1lc7Nn408rRBVdjn+VfX3bi7n0WFPlY0H36jBm58lAABnDbdw05QIjolwiypjjA1UHGa6SIFQb7thxq/5EOhAbUxTlvTh4qI5AIC3al/D5/FPu6WMRwNHEf65owEPv1WLQ1GFPEvguokhfH1sAAYv2sgYYwMah5kuqqpPoC7pTpoXMro+Cul4/ziMD34JAOGFw3+GTckuH3OgO1Tv4JG3avHKjhgUAROGGJj35QhOKDq6VgpnjLGjFYeZLqhpSGJfdRQ2uf1bzHauzdSWrxZ8C0EZxqHkPqyufrlbjjkQERHW7YnjgTdqsKfagU8XuGJ8EHMmhBAw+KPNGGNHC/6N3wUJWyGpbNhw+2dYonvCTEAL4d8Kvw0AWFv9Mg4kPu+W4w4ktXGFx9+tx+KtUSQdYFShjv83LYLThpp9XTTGGGO9jMNMFymlkEjXzFjddtyxgdMxxj8BCgovHP4zFDndduxc9/6BBP5rTQ22HUpCl8DMMX5cNzGEfD9/nBlj7GjEv/27iEDpMGOJIy802RFCCPxb4RXwyQC+SOzGmzWvdtuxc1XMJjz3fj2e3FiP+iShNKzhpikRfGVE7wy5Zowx1j9xmOkih5LpTrpWN/WZSQnrefhqwTcBACurX8Dh5IFuPX4u2VVp44G1NVj/eQICwPTjLPzHlDCGhHmJAMYYO9pxmOmiOMXS33dnM1PKqcGpGOkbC5uSeOHwUyBSnT4WEcGuVXDqFEhRN5ay5ziKsPyjBjzyVi0qGhTyfRL/PimEi08MQOcJ6RhjjCHHwsy9994LIQRuvvnmvi5KWhzusGwJDbro/qHAQgh8reg7MISFPfGPsaFudaePpaKAtCSkT0LVAWT370BzoM7BQ2/WYsWnMRCA04eamDctglGFPOSaMcZYo5wJM2+//TYee+wxjB8/vq+LkiVBUQDd38SUKV8vxrn5swAAr1YuRrVd0eFjqBhBaAJGkYQ5WELLF1AN7vb+hoiwdncMD75Rg89rHAQMge9MCOLb44PwG1wbwxhjLFtOhJm6ujrMmTMHCxcuREFBQV8XJ02RQhLeSKZuGpbdmonh6RhmjUKCYnip4mkQtT+EqASBbMAoFND8AkJ3Q40xWAIQsGv7T7NTdUzhj+/UYem2BtgKGF2kY960CE4dwkOuGWOMtSwnwszcuXNx8cUXY8aMGW3uG4/HUVNTk3XrKUQKCa/PjNUD/WUySSExs+hKaNCxo+F9vF+/vl3PI5ug4oBeKKCFGn/cQgjoYQmzREIPSjh1gEr2baDZsj+B+9fW4KPDNnQJzBrrx7VnhpDny4mPKWOMsT7S768SixYtwrvvvosFCxa0a/8FCxYgLy8vfSsrK+uxshEI8W6e/fdIBhml+Er+xQCA5ZXPot45clAjRXCigJ4noOe1/KOWloAxSMIoEqA44ESpQ7U+3aEhSVi0pR5PbapHNEkYFtFwy9QIpg33QfCQa8YYY23o12Hms88+w0033YSnn34aPl/7wsLtt9+O6urq9O2zzz7rsfK5zUxuzUxPNzOlTI1cgBJjGBpUPZZXPNvqfkQEp56ghwSMAnnEUCA0AaNAg1kiIXQBp45ATu8Emk8rknhgbQ02fOEOuT5vpA9zvxTG4BAPuWaMMdY+el8X4Eg2bNiAgwcP4vTTT09vcxwHr7/+Oh566CHE43FoWvZFz7IsWFbPNvmkKCIkUxPm9ULNDABoQsPXi67CH/cvwAfRdzAuOhEnBiY0L1s9QfNJ6EUSop2rRmtBCWES7ErAriFIH0GaPVMzYivC8o8b8PrOOAhAoV/i8vFBHFfQrz+SjDHG+qF+feU477zz8N5772Vtu+aaazBmzBj86Ec/ahZkehuRQgKp2X97J8wAQKl1LKZEzscbNf/ASxXPYLhvNHyyccVup4EgdAmjSEJ2cPSPNASMYglhKjhVBCdJkAF0a3PP/loHz2ypx75ad4mGScNMzBwTgE/nJiXGGGMd16/DTDgcxrhx47K2BYNBFBUVNdveFxQaa2Z6o89MprPzvobt0U04bB/APyufx8yiK90yJQhQgDFYQPo6Fw6EFDDyNUiLYFcoOLUELUAQXQwbigird8Xx8kcNcAgIGgLfHBfAuBIeqcQYG1jcvoeE7P+7fRKbbkXG9qytlPG8zMfI/a7ZY9S4V9ZjWcdBK6/nPUZNjpk+BkBITdqafewquw4qdiyAL/XGqW1Rvw4z/Z9C0ps0zxS907SVYkgTXyu6Ek8euA8b69bg5MCZOM4YAxUHjCIBLdj17lCaX0CWSCSrFOxqgjSo0wGpqkHh2ffqsaPCBgCMHWTgW+MCCFv9utsW6+dsSiKh3D8omv+yb/JrvcULg1vD2vKFwf3l3fKFocljzV6v+WONly3lHbv5hSQ1w3dLF67UhaTNx1p6PaCVi2X2MVt7LPNC3OL+HTnfrT3W4jlp/prNQ0Hz95/5M3CP3fLPp2k50o9Rk/edeowyjtmsfNmPHW1Orj4DN2FOn71+zoWZlStX9nUR0khQYzNTL9fMAMBw3wmYGJ6Ot2tX4sXD/4Nrwz9BoNDX6silzkjNSSMtgl3pLoegBQVEB5YS2PhFAku2RtFgEwwN+PqYACYPM3mkEmsTEaFe1aAyWY5K+xCq7MOotA+h0i5HlV2OGqcKR+OFgx0NBETW/0X6d6aAzH5MCO/7jMdExvPaeExAAKKF18t6zablaDxyMulgkDG0185MS3IuzPQrGhonzeuDMAMA5+bPxvboZlQ55Vjt/C++VnB5t4cEd04aAWkSkhWAU0eQfmqzP040qbDkgyg27XcX4jw2T8Pl44MYFOSRSqxRQsVRZZej0s4OLKltqYVc2y93LgLCG1Da6mMie0v6O5HxvKzjNzlm1vts/nqZJW+5jKKV/Zu/x6bvs+m5yzzfzR7L2J5+TDT5WaXPd5NjHuk9Zr2fjMeyytHy+W75M9KOx1r6jGS8ZnvPXy7Zf6gSwwqL+rQMHGa6QlA6zPRmB+BMlvTh3/xz8GzdQ1hf+xomRCehLDSqR15LWgLmYLfZyakmODZB+lruHPzx4SSe3VKP6jhBCuC8UT6cN9IHjReHPOooUqhxKhsDS7IxqFTZ5ahXtUd8voBARCtEvl6EAn0QCoxiFOjFyNfdrz4ZbHZxYIwdXTjMdIUGJJEA0Hc1M06UcELwFEwwpmBT5Tos2fkkrj/5J9BlzyzGKDQBo1BCswjJCoJTq6AFkR7+nXQIL3/UgNW73ZBXHJD49vggjs3nj9pA1uDUp8NJ069V9mEoOEd8vk8GUKAXo0AflA4tqbCSpxdCE/z5YYy1jn9DdIHURMbaTBaIAEUONNk7zSgqThAAjGKJi8zL8HHt+zgY+wKr9i3Decdc0mOvK4SAFhLunDQVgF1PkBZhX8zBX7bU40Cd2xlvSpmFr53oh8lDrnOeQ7bX/NM0qLjfx1T0iM+X0LyQ0iSwGKnalcARn88YY0fCYaaLMifNizlRNNj1CBkRmFrPjm5SSQIlAGOQhBYQCCCEmcPnYNEnj2LVvmU4ueB0DAn03FIOACBNAWOwBFU7eO29GP65OwaHgJApcOkpQYwd1DO1Q6z7uR1ta92+KsnmtSvVTiXa6mgb0iLp2pR8fRAK9CLvazHCWj6k4JFrjLGewWGmi1LNTJb0Q5GDsJmPqF0HAQlD65mLOTkEFQP0AgEt3FjrcXLBGTgp/zRsrdqIJTufxHUn3Q5N9GwtUUVU4em36vDJQXfI9UmFOr55SgBhP3fy7W+SKpHRybY8o9Ot+32SEkd8viHMrL4q+VlNQkUwe3ixVcYYaw2HmS6gjOUMTGFBKQVLWghYQZTH9iMoItBl955iUgSnHtAjAkZ+9ppLQgjMHD4Hn9Zux97oLryx/584q/TCbn39dDmI8PancfxtfT3iSYKlC3zjjABOyzfcFbjjBGlx81JvUqRQ61R54eRQVu1KZbIc9aqtFeQF8rSCjLBSjAJjUPr7oAxzB1vGWL/EYaYLkiqZnqzJkj7EnRh0aaLYNxgOKVTEDyJs5HVbH5rU4pFaUMIolC3O9RI28/FvZZdiya4nsGLv3zG24DQU+0q65fVT6mIKf32zDpv3uH/JHzdIx3e+HEZxWHPL6CfYlQpOnYIMiA7NScOOLKaibkjx5l3Jbg5qf0fb/IwalgKvKYg72jLGchX/5uqCmN3Y6dEUFuKIQZc6hJAY7C+Fgo3qeAXCZvf0F1BRQFrumktHWlrg9OJp2FKxHp/UbMXSXU/i/5x4a7f1V9i2N4Fn3qhFTYM75PqiCQGcd7IfUjbOpaBHBKQpkKxQUHUE2Q1LIRwtHLJRbVdkhJTswNLejraNQSUVWtymIL8W7KV3whhjvYfDTBdEvTBjwAJIgABIb1ImTWoY7DvGrfpPVHuBpvMXdBUjCAkYhbLNlayFEJg14rv43fvzsav2I7xz6HVMGjy9068NAAmb8PcN9VizPQYAKMnTcOWXwygravkjJH3enDTV7pw0Qu/8UggDCREhqmqz+qpkzm5b41Skp05vTVBGUGAUI18rbjLnyiDuaMsYOypxmOmCmOOuy2RJKz3QI/NCYmgGSvzHwFEKdYkqhM0CdCbPqCSB7MaRS+1RYBXj/GGz8dKeRfjHZ3/D6LzxyLcKO/7iAPaUJ/HUmjocrHGbML4yxoeZpwfbHHIt9Ow5aTqzFEKuICI4sJFQcSQohriKodquaNLJ9hAq7cPpflat0YWRbvrJ6r/ifeWOtowxlo3DTBc02KlFJn0gBQgAssnoIVOzMCQ4DPvqdqM2WYWImd+h1yCHoBq8xSNDHQsBkwefiy2H38Zn9Z/ghd3/g++ccGOHOnA6ivDq+w1YvjkKRUCeX+KKaSGMGdr+Va7Tc9IYBLsSsNu5FEJPcigVOryb931SZd6PIUmJ9PcJFc++T3EkVcLb373fVo1KI4GIlt88sBhu7Qp3tGWMsY7hMNMFDV4zkyl8UI6CkKLFKn6f5sOQ4DDsrd+N+mQtgka4XccnRVD1gJ4noOfJDl/gpJCYfdxVePiDn2N79RZsrngLE4rat0R7ea2Dp9bUYtchd8j1acNNfOtLIQQ7ucq1tASMQRLCUrCrCE6SIP0tL4WQoshBghJIqFgrQSLWGEpUHMkm4SS9rUlwaauTbFcZwoQpLIT1/KyZbDNntNUFz8HDGGPdhcNMFzQ2M3lhRtOa1cyk+PUghgTKsC+6C1G7DgE9dMRjExGcOoIekjAKWh651B6D/UNxztCv4dW9S7FszyIcHzkJISNyxNdd93EcS96pQ8IGfIbANycHceZxVqvBQ5Fyw4WKI+HEkFAJ72scCccLE95jcRVHnOKIRWNIVMeQlHEkkcgKIKlakY4vMNgxGnSY0oIpLJjSguF9NUUr26QFU/hgCguGNN3vvceM9P4m91lhjLFexmGmC9LNTJo7x4wUAhKtD8MOGWGU+MrwRcNuxOwG+HR/q/uqKCB9EnrhkUcutcdZQy7E+xUbsL/hM7y4+xnMHD4HcRVDwnGbVuJe4KiJNeCNHTX4ojoKkZ9AScjG8aUKu5HExzvcIJJ+Tjq4uLUkPUlCwpReiBBmY6hoEiRaCyCGyAwjjdt6ekJBxhhjvYPDTBekwoxP80M5BAENWht/lUesfDhwsD+6B9IWMPXmC1SqGLkLOha1PXKpPTSp4xvHXY1Ht/4S71e+g/cr32l9Zx/g84oUBbClqv2vIyDcwKBZbvjQrIz7GV+lBVNzw4mWMKA3WDBgwefPDByNNSAadO5DwhhjrFUcZrogNc+MqfmgoGBCQrTjr/18swhKOTgQ2wvhaFnLHqiEO3LJHCyg+bvvAj40OBznHDMTK/b+3S2z9JpKpA8NcQP1DQZImbB0C6MGBZHv8zcJIT6Y0nRDiLRgaRYM7zHLCy66MDoVOpwGb5K9KEELgOekYYwx1iEcZrqgIdVnRvOBHAVNmu0aei0EUOgbBIfsrGUPyCaoeGrkUvf3uzhn6EycNeRCSKFBColdh5J4ak0tymsVBIBzTvLjotMCMLTeDROaX0AaEskqb04ag+ekYYwx1n4cZrog1cxk6T6QrTo0FbwQAoP8Q6BI4XD8IEJaBIhq0PPdkUs9RZcGHEVYtrker7zXACIgPyDxnS+HcMKQ9g+57m5C95rVLIJdObDnpGGMMda9OMx0QaqZydJ8IEHQqWPDbYWQGOQvhU1JVFVUIVIQgVHQuaaa9jpQbeOpNXX47LA75PqM4yx8c3IQAbPvR+AIIaCHBaRJSFYATj+Yk4Yxxlj/x2GmC1LLGVi6DxAKWifmDtGkhkKnBCpCqAtVwpLFEOjaxVsRobaBUFnvoKJOoSLj6479SSQdIGAKfGtyCKcf1/9mk5VWxlIIVQTHJkjfkeekYYwxdvTiMNMF6XlmdB8gAaiOX2yTURuGaWL44OPweZJQFS9HgTXoiBduRxGqowoV9QqVdQ4q6hUqvK+VdQ4q6xVs1fprji41MGdaCPmB/js0WWgCRkHjUghOrYIWdLczxhhjmTjMdEFWnxkJSOpYU40dd0AKCA0JwAoZONYehV01H6G84TCEU4DDdQ4qM2pWUjUtVVEF1cbM+UK4fWEKgxIFIQ2FQYnCkIZBEQ0jB+tdWvSytwgh3H4zqaUQagnSR90yXJ0xxtjAwWGmk2xHobKhDoDbZwY6tTvMxG2FAzUxHKxqQK0hUHmoEgdr4zhUG8P+mjiqogCh8ojH0CRQEJQoCGooDEkUZnwtCEnkByS0AdJ5VpoZSyFUekshBLjZ6WhHDkEl3IVYhXADPIQAZOr7xlvmfe5UztjAw2Gmk9Z9ehgVDfWQOvCPjQ5OiggMKnCrS6JJB+XRBA7VJ1AedW+Hoo3f18TtNo+va0BBECgKGSj0alYKghJFITesRPwyJ2pXuouQAka+5nUOVtzslIOIvBvcZTPc2kV3eU6lvGU6ye3zRUjt5+4L7zEiBSQEYAOQBFgCMiBBRCBHASQABUAR4LifDUECIC/LkIAguIEHbsgREF4YEpCa+1V4gUhoLQUj/swx1t9wmOmkupgNKeMAgG17JbbuAl7Q9kCTnyOabHshQ78uMTjiw+CIhZKwD4PClns/bGFw2IIjqrCnfgcMqbW5jtPRRAsISFMiWeE1O5kEafHFpTs1Bg43SLj3KTuEeMFEITuEZBwFSHVkJ/d7KYUbBpARJIQbJGTGVykFpAA0KaAJL3wkAGUDmi6g5QuYIQEjoMHwuzWQirxARORlGYLjKJACbIegHIJtExyH4CjAsRUch0AKcIhADsGxFchxF3glG+nvG0+KcN+K994EiXSNUPq9SAASkBKQ0ntcNr53qQlINJ4axlj34DDTSReMK8EP33XXJJpyfAhbdwFVDQQ4bpAJmRqKAyYGBUwUe7dBQRN5JDGkwIfBZRHo5pE64A6GgsLuuo8hIeHTAz3/pnKE0N1mJ+nzZg6uU5CBo29Omj4JHRDQZcuhQ3o3CLcZVHjj8tzV5L0LO5DeR3rHgwC01IOpUjkEO0Fw4gQhBfR8wAxLGAENuq/7ftak3PORqg1SRFDKPXekCLZDIAdQDsEhBaUAZROUghuMbAVHEeykG5icpDv6TimCowhQqZDlhSRyX5MEuW83dfpTN+98CYl07ZD0ms4gAV3K9MScAo3nO7ULhyR2tOIw00nRZDT9/cUTwrjwBMDZPxTBPB+KAyZ8evOgkqhPQmoS4dJgG0HGVewrgaNsfBb9FFJoMLX+N4y6rwghoEcEpCmQrFRQdQQZoH65FELXQ0fqq0D66kcZzSHIDh0QgNZC6JBCQJfC7VidChTSDRTu80W6NgFoDBiZoUPCq33oifOk3PBiJ9w+MJolEBiswQx2b4DJJKR7rrRuTgHKcWuBlOP+fB1HedvcMOPWCqVqihTIBhzbrRFK2gSlFJTjBiFbEVTC7fTvUGPn/9Rnh7wwo9JtaWgWkNxaIZnRtNb4M82sGYP3WUjf55DEcgSHmU6qT9YDAIR3KdB1iWF5AVghs8VfunbMgYBAcJAfuq99Q6KFEBgcGAqbktgX3YOIKIQhOz6XzUAmfd6cNN5SCNCoW9e06jACEo5CNOGk7w+00NGdSBGcBMGOuzUVmiUQKNZgeAFG5mifKKk1LXvHpkEg5TaBpWt0FIBUbVA6CAHKUV4NEUC2WyuUaj4juPeJ3CYzRyk4qT5Jyq2NcrxsbAuVriRSIJBwQxLQGJKy+gpl5GshRPOQ5H3epUzNmuV+trP2zc0fLeunOMx0Ur3thhlDmiAQdE2DpmkgRc3CjJNUcBIKoRI/zFDHwogUEkODx0KRg/0Ne5FvFkGX/GPLJDQBo7BxTpq+WApBERCLO4g7CqYuUBQyEbL0dEgB3L9whRdY0qEkR0JHdyJya2CcuFsLpVsCgSINRii3A0x3SjU1effS29vz24PICzMKbo1fK8GIiKBstxnNtgGVdIORUl7IVJTe31EAUh2zldsvyWuVhAPl3Xe3O15YIrg1Sen6xVQNJdBY80iUkWpS1Uzeu85smkRGTVFmKMrovC3S/646d85Zbuv3V8UFCxZg8eLF+PDDD+H3+zF16lT86le/woknntin5Uo1MxnSgiIF0zABmdFh0KMcBTtqw1/sg5XXubWPpNAwNDgCNtk43HAA+b5iaO1YnftoIoSAFhIQJsGuAOxeWgohaRMakg4UEQKmhuKIHyGfDks/yhJKG5oGGM0U8BVqMEMSuk9C9sPmwVwlhIDQkFEZ1LFzmw4/qjEYZYWi9M1tCiPvpmyV7hME76tS8JralBdgvCZWAggCJNxAQ25igeO1lCnhNsQ6IDccQXh9mdznpr4SqayQlCpzevhaqvOXl7wEspvPBIekAaPfh5lVq1Zh7ty5mDhxImzbxo9//GN89atfxdatWxEMBvusXHVJd44ZQ5hQ5EDXTUgp3H/IHlKERK0NX4GJQKGvS0M6daljWPA4KFKoiJej0CqGPNr+pG8HaQoYg705aaq8OWn83TuclghoSDqIJxV0TSDi15HnNxC09AEzt093ICIorwmJlNuE5CvUYAQlDD8HmP6qtVqh5pr/QXXEWiHHDSBZtULKHaWm3PavjBDVeIxUjVDji3i1Ol5TWKoyx0FjrbjyAg1J93HlHcshd8Sb7QWj1Ci4VCdwx0tGpFST6QGAxlJkdJpHdkhKB58mISk1jQZ32u45/T7MLF++POv+E088gcGDB2PDhg34yle+0kelauwzk6qZsTQTUpew425fCSJCos6GGdIRLPZ3S5OHqVkoC42Eo2xUJw4j3yzmOS9aIKSAUaBBWt07J42jgGjChu0QfKbEkDwfwj4dfkPjX0geIoJKAnbMHRatmQJWvgYzKKH7JTReNHRA64laIUo1cTWtFbK975vUCjUGJgBe/6F0BQ3c/6VGiyFjTiFCqp+QF4LglcH7HZselUaU7qifGZKUUu5XrwbKSdckEWy3d79bk+QWL93pv0lUyzgZGTVDTTptp0YA8si2Rv0+zDRVXV0NACgsLOzTcihS8OsBmNIHANCkDmlIUIM7IV6y3oZmSQQHByC7scnB0nwoC7vLHlQnKpBvFXXbsQcaLSAgDNm4FEJn5qQhIO4oNCQcSCEQ8unI8+sIWTp0jWvGgMYA48TdZgfNFLAiGswwBxjWMV2qFcoY+n7EWiHljjJrWiuEdF8iASQBQe4t1TdIprsyN4YGIb3OzqlZp2Vj+HBnonZr6xXcUJMKQKn+RZkjHN3pAbJDku2FpFQtkkNeB294Tbep90pevyWgseaqJZRxntHOkW2pTtz9PCTlVJhRSuHmm2/GtGnTMG7cuBb3icfjiMfj6fs1NTU9Upbzh5+PYf6xeGn7myAQNKFB6u4HN9lgQwhv5JLV/X1bAnoQx4ZHYacXaPLMvg12/Zk0BIxiCWF6K3DXKcigaLNGSykglnA79Fq6xKCQhbDfQMDQjroOu61xkgQn5gYYaQiYYa8PjF9C4/WzWC9LXaA7UytE6ealztcKKeWGpnStEDXWCjW+kBt8Uk1PQjaObBSayAhBaD6CDNnHaex/5A3Rp+YhKd23CI37uCHJnR8pNaeSIq/DN1Kj3dyDZM7G3XJIcjttJ460snEvyakwM3fuXLz//vtYs2ZNq/ssWLAAP/vZz3qtTKkPmxQapCahHAUIiVBJAGaw54ZRh4wIjg2Pwq7a7ahLViNk5PXYa+W69FIIljfJXi1Ba2VOmoTXoZcyOvSGfTpM7tALwAswcbe/gzQEjKDmTWbHAYblrnSg6IZaIVJNgpFqoVbIgTuPkOPOPg1yJ2Ns1lcos3tO6luvRigdeCSgpWqFpMiqFeqwjJDU2Mk6Y4mRdOfr7JDkRxx5gc4NcOkuORNmbrjhBrz44ot4/fXXMWzYsFb3u/322zFv3rz0/ZqaGpSVlfVo2QQENKFBaBKaocFfaMKK9Px8MHlmAY4NHY9dtR8hatfxsgdt0PwC0mick0YYBOkTGR16HRiaRL5fR57fRMDSuEMv3F+ydsz9JSw1Ad0vYEV06H4B3eKQx1h2rVAH+wm1VSvkNHaIVk5qyY3O1woJr31JZNYAyexaISEATbb/37aKGYiYfTsHWr8PM0SEG2+8EUuWLMHKlStx3HHHHXF/y7JgWb03U64iBSkENKFDtyQCRT748s1e65hbYBXDVjYve9BOQhcwiiSkRYiVKzSUJ+FYhIBPw9B8P0KWDl87Zmce6JTtjkJSSQWhSxh+ASusQw8IaGbbzXTdWhZScJQDh7xbk++pWdW3+wdGSx0rhWh5e2tEKxemjr7/Hj9OaxfQVjfn+HFa0dr5zInjCLQ4t2K6T0uLx9ea1PykwpBose8QvD5CyiEgVTOkACiRHk0GeEGq2agtpIOPzKjJSvUL6mv9PszMnTsXzzzzDP7+978jHA5j//79AIC8vDz4/f4+Lh0AEITQoAnNq5Xp/QshL3vQAQTEbIV62BAFQMjSEXR05EUMmP6ju5ZBOQQn5s4aK6RbAxMYZMLwC2hW9wUYIoJDDhQp2Mp2v5INpdyv5FVjp4afSCGhSe/fmNBgSAMBGYBP88HUTehSz7o4HGl0SMubGx/IDEZZ21v5PuvbDj63teNkb+7gMamVsrVj//aEvFaf21qH04znND1+a693pGO18gItl6m143fwdTsSfo+kp1836zjeWl5tlSezVihdO5Q1hL7xq8qoEUotxIqk91yJHp/Tqy39Psz8/ve/BwBMnz49a/vjjz+Oq6++uvcL1AJNSMg+nMSucdkDG19EdyOPlz1oxlGE+riDuG3Db+o4psCPgqCJgK6hocJGw2GFhFIwAr1b69DXlONNZpdIBRjAX2x4TUjtn0VZkWo1nChSUEpl/ZGnS90NKUKDJjX4NT9M04SlWfBpPmhSgy51aML92vR7NrB09ILe4f27EHB7pDwdDGw9fR46XB5v/1StkHIIfsvXoWN0t37/W6HDKb0PCGh9PiOvu+xBGRTZvOxBhnhSoT5hg0AIWTqGFYaQ5zdhGY1/tgQG6dB9CtFDNuLVCmZYDugp9clxm5DSAcYH+Ap1GAGZFWAc5cBxWm7acciBoMYmGyFEOnCkA4rhh6VZsHQLhjRgSCMdSDSpQRd6OpzwBJBHtw436wzcf56sk/hq1w36umYmJbXsgUMOyhv2H7XLHihyJ7drSNgwdQ3FIROFIQsRn4GW+rQJ4c6LolkCDeUOYlXOgOvcSoqQjCkk4w5IKLfzc6GC8BFs00FCKHckXgMgvClVUzUnUkjoUoclLZiaCVM34ZM+6JoOXbjBpKWgcjTVcDHG+haHmW6ga73X4bctutQxLHQcHHKOumUPko5CXdyG7SgELR0jioPID5jwt7NDr25JhEoFNJ9AtNyBSjgwQrLf/GxbkmrecUhBkePOQgqvP4qtoBIEFYc7Y6glYOYJWEEdpl+DpumwNDegWNJy+594tSWpZp70fa+2hTHG+iMOM92gv/VPMaR59Cx74A2rro/b0HSBfL+JopCJiM+A3om1f4QUCBTp0H0S0YNJxGsUzGDvrSPkTnSloOCO4HGDiuOtG2Onh12mqtkFBDTp1p5o3n+6bUEmdRiaAStiIJhnIhD2IRCwYBh6VkgZsJ8LxthRhcNMNzBE304W1JLMZQ+qEodRYBX3dZG6laMIdXEbCdvJ6tAbsvRuaU83gxLaMBPRchuxSge6KaB3crRTs9qTdEBRcFRqyAC80TsCmpSQUkJCQgqZrj0xpAFTmpBSg+41/2hSg4QGSgJ2jKBBgxnS4M+z4A8asII6NF52gTE2wHGY6SohYMj+F2aAgbnsQSzpIJpwQADCPh3HFgWQ5zc6PUMveeuhKFLp79NThENBFCpIXaG+XEFFCVpQAdKbxAoZz0nN902AOwGDN0cDEYRorD2RcJtrLM2CJS3omp7ub5I5/DjVT0UTWou1J0SEZNxBst6BIsDwaYgMNuAPW7ACOjSesZgxdhThMNNF7l/P/bcvQa4te9A0UBAIjlKIxpNoSNowdYm8gIa8gIGgCUA2oN6Oos6mFoNJqjMrZdZ+AO5smN5SuUIISOH2jXFXn5Xeru73Vp6EbkkkygG7HjCDOgzT/blLKdzGHS+AuKvceseBW8OiS93dR2qQUkKDjs607hAR7LiDRMwBKYLh0xEq8iEQNuELGNAMDjCMsaMTh5kuSk2Y1591ddkDRapxgqUmQYNSj2VuS3+fChWNtRgp3nxo6a+N2xvDgK0I0YQDpYCQZWBooYU8v4WA6X5sUwFCCpkeVeOGBfdrakSNWyMiG0OL9xotbU8FkpYeU2WEqkMNqDkUhaZJWMGe73NCRLATComYDXIIuqUjVOhDIGLCCujQjf792WOMsd7AYaaLNMh+H2aA7GUP4k4sHTiaTFadHS68O0IIb4l4r77Bu4C7WzNqIrz+G1IIt9YCqT4dEjKjyURCus/2jpfalqopiSYU6hMOTENDUaEPpXkBFIYsmFrj81OhozdpukDhkAB8fh0V++tRX5lAIM+A7OY+KakAk4zZcByCbmoI5VkI5LlNSDovt8AYY1k4zHSREBq0HJmcrthXAgBosOsbQ4YXMFIhJRUu3JCSUVuRcT8zfDSGG9GlIeBJR6GmIYmY7SBk6TiuyIdBIR8i/v414kYIgWC+BcOnoXJ/Peoq4vAFdRi+rn8G7ITbhOTYBN2UCORZbg1M0IDBAYYxxlqVG1fhfkz2kwnz2kMIgUH+IX1djCzRhI2aWBICAvkBA6PzwygMmvD18+YT06djUFkYll9H1cEGJBMJ+MNGh4OXnXSQaHDgJBV0U4M/ZCCQZ8EXNGBY/fscMMZYf8FhpotEjjQz9SeOItTGkqhP2vDrGo4p8KMk7EN+wITWzrWA+gOpSeSXBGH6DVTur0d9VRz+sNnmSCIn6faBsZMKuqHBFzIQ9JqQDKvl0UuMMcZax2Gmi1JTvrO2xZIOahqScIgQ8Rs4qSiCopCFoJXbH8NAxITh01C1vx41h2MwfTqsQPZ7cmwvwCQUNF3CChgoKHVHIRk+DjCMMdYVuX0V6Qek0HKmmakvKCLUx92mJFOXKA5bKM3zoSBowhhAk7kZpoaiYWGYAR1VBxoQrU7ACujuXDAJBU0XsPw6CkosWEEDJgcYxhjrNhxmukgTvOJvS1IdeuOOg6Bl4ITBYRSHLUR8/atDb3eSUiCvOADLb6BiXz3iDTZMv47IID/8IQOmT0+vSM0YY6z7cJjpgoAeQNgo6Oti9CvRhI3qhiSkECgIGhidlxsderuTL2igZEQEdlLBtDQOMIwx1sM4zHSBXw8g3xzU18Xoc44i1MSSiCZs+E0NxxYGMDjiQ77fgDxKL+SaLnlJAcYY6yUcZlinxZIOqhuSUF6H3uOK81AUMtMz9DLGGGO9ga86rEMUEepiNuriNkxdYHDEwpA8HwoDJvQB1KGXMcZY7uAww9ol6ShUNySRcByELAPHDw5hUMRC2Bq4HXoZY4zlBg4zrFVE7kKPNbHGDr1DvRl6Lf3o6dDLGGOsf+Mww5pxFKGmIYlo0kHA69BbEvEh7yju0MsYY6z/4jDTRURAXdyGJgQ06d6kQE42vTTr0DsoyB16GWOM9Xt8leoCS9dQGDSQdAgJx4FSBEe5nWQJgABAIIAEhAA0ISClgBSNoafx+74JQooItV6HXp8uUZJnYUjEj4KAwR16GWOM5QQOM12QFzAwcUQhHEWwFWV9dRRBkXtfKULSUUg6CglbIakUknZqf4W44+7jEEAKUKQgBEDe6wi4Yae1ECRldlBqj1SH3ritEPbpOLEkhKKwhYjP6LkTxhhjjPUADjNdJISArgl0pj+s8sKPosYg5IaaxkCUuiUdhbjtwHaAhOPAcQCbCI7jgBTggKAcQIEgQCC4aUh4NT2a8EKQFGhIOtCkQFHQRGmeHwVBgzv0MsYYy1kcZvqQlAJmJzvUNgs9RI3hyLtvO25QStcIOYSEozAk4sOgsMUdehljjA0IHGZylJQCEgJH0ZJHjDHGWIu4hydjjDHGchqHGcYYY4zlNA4zjDHGGMtpHGYYY4wxltNyJsw8/PDDGDFiBHw+HyZPnoz169f3dZEYY4wx1g/kRJh59tlnMW/ePMyfPx/vvvsuTj31VFxwwQU4ePBgXxeNMcYYY30sJ8LM/fffj+9973u45pprcNJJJ+HRRx9FIBDAn/70p74uGmOMMcb6WL8PM4lEAhs2bMCMGTPS26SUmDFjBtatW9eHJWOMMcZYf9DvJ80rLy+H4zgoKSnJ2l5SUoIPP/yw2f7xeBzxeDx9v6ampsfLyBhjjLG+0+9rZjpqwYIFyMvLS9/Kysr6ukiMMcYY60H9PswUFxdD0zQcOHAga/uBAwcwZMiQZvvffvvtqK6uTt8+++yz3ioqY4wxxvpAvw8zpmnijDPOwIoVK9LblFJYsWIFpkyZ0mx/y7IQiUSybowxxhgbuPp9nxkAmDdvHq666iqceeaZmDRpEh588EHU19fjmmuu6euiMcYYY6yP5USYueyyy3Do0CH89Kc/xf79+zFhwgQsX768WadgxhhjjB19BBFRXxeiJ1VXVyM/Px+fffYZNzkxxhhjOaKmpgZlZWWoqqpCXl7eEffNiZqZrqitrQUAHtXEGGOM5aDa2to2w8yAr5lRSuGLL75AOByGEKLbjptKjFzj0/P4XPcOPs+9g89z7+Dz3Ht66lwTEWprazF06FBIeeTxSgO+ZkZKiWHDhvXY8XnEVO/hc907+Dz3Dj7PvYPPc+/piXPdVo1MSr8fms0YY4wxdiQcZhhjjDGW0zjMdJJlWZg/fz4sy+rrogx4fK57B5/n3sHnuXfwee49/eFcD/gOwIwxxhgb2LhmhjHGGGM5jcMMY4wxxnIahxnGGGOM5TQOM0fw8MMPY8SIEfD5fJg8eTLWr19/xP2fe+45jBkzBj6fD6eccgqWLVvWSyXNbR05zwsXLsRZZ52FgoICFBQUYMaMGW3+XFijjn6mUxYtWgQhBGbNmtWzBRwgOnqeq6qqMHfuXJSWlsKyLIwePZp/f7RDR8/zgw8+iBNPPBF+vx9lZWW45ZZbEIvFeqm0uen111/HzJkzMXToUAghsHTp0jafs3LlSpx++umwLAvHH388nnjiiR4vJ4i1aNGiRWSaJv3pT3+iDz74gL73ve9Rfn4+HThwoMX9165dS5qm0a9//WvaunUr3XnnnWQYBr333nu9XPLc0tHzfMUVV9DDDz9MGzdupG3bttHVV19NeXl59Pnnn/dyyXNPR891ys6dO+mYY46hs846iy655JLeKWwO6+h5jsfjdOaZZ9JFF11Ea9asoZ07d9LKlStp06ZNvVzy3NLR8/z000+TZVn09NNP086dO+kf//gHlZaW0i233NLLJc8ty5YtozvuuIMWL15MAGjJkiVH3P/TTz+lQCBA8+bNo61bt9Lvfvc70jSNli9f3qPl5DDTikmTJtHcuXPT9x3HoaFDh9KCBQta3P/SSy+liy++OGvb5MmT6d///d97tJy5rqPnuSnbtikcDtOTTz7ZU0UcMDpzrm3bpqlTp9If//hHuuqqqzjMtENHz/Pvf/97GjlyJCUSid4q4oDQ0fM8d+5cOvfcc7O2zZs3j6ZNm9aj5RxI2hNmfvjDH9LJJ5+cte2yyy6jCy64oAdLRsTNTC1IJBLYsGEDZsyYkd4mpcSMGTOwbt26Fp+zbt26rP0B4IILLmh1f9a589xUNBpFMplEYWFhTxVzQOjsuf75z3+OwYMH4//+3//bG8XMeZ05z//7v/+LKVOmYO7cuSgpKcG4ceNwzz33wHGc3ip2zunMeZ46dSo2bNiQbor69NNPsWzZMlx00UW9UuajRV9dCwf82kydUV5eDsdxUFJSkrW9pKQEH374YYvP2b9/f4v779+/v8fKmes6c56b+tGPfoShQ4c2+8fDsnXmXK9Zswb//d//jU2bNvVCCQeGzpznTz/9FK+99hrmzJmDZcuWYceOHbj++uuRTCYxf/783ih2zunMeb7iiitQXl6OL3/5yyAi2LaN73//+/jxj3/cG0U+arR2LaypqUFDQwP8fn+PvC7XzLCcde+992LRokVYsmQJfD5fXxdnQKmtrcWVV16JhQsXori4uK+LM6AppTB48GD84Q9/wBlnnIHLLrsMd9xxBx599NG+LtqAsnLlStxzzz145JFH8O6772Lx4sV46aWXcPfdd/d10Vg34JqZFhQXF0PTNBw4cCBr+4EDBzBkyJAWnzNkyJAO7c86d55T7rvvPtx777149dVXMX78+J4s5oDQ0XP9ySefYNeuXZg5c2Z6m1IKAKDrOrZv345Ro0b1bKFzUGc+06WlpTAMA5qmpbeNHTsW+/fvRyKRgGmaPVrmXNSZ8/yTn/wEV155Ja699loAwCmnnIL6+npcd911uOOOOyAl/23fHVq7FkYikR6rlQG4ZqZFpmnijDPOwIoVK9LblFJYsWIFpkyZ0uJzpkyZkrU/APzzn/9sdX/WufMMAL/+9a9x9913Y/ny5TjzzDN7o6g5r6PnesyYMXjvvfewadOm9O3rX/86zjnnHGzatAllZWW9Wfyc0ZnP9LRp07Bjx450WASAjz76CKWlpRxkWtGZ8xyNRpsFllSAJF7Vp9v02bWwR7sX57BFixaRZVn0xBNP0NatW+m6666j/Px82r9/PxERXXnllXTbbbel91+7di3puk733Xcfbdu2jebPn89Ds9uho+f53nvvJdM06W9/+xvt27cvfautre2rt5AzOnqum+LRTO3T0fO8Z88eCofDdMMNN9D27dvpxRdfpMGDB9MvfvGLvnoLOaGj53n+/PkUDofpL3/5C3366af0yiuv0KhRo+jSSy/tq7eQE2pra2njxo20ceNGAkD3338/bdy4kXbv3k1ERLfddhtdeeWV6f1TQ7N/8IMf0LZt2+jhhx/modl97Xe/+x0de+yxZJomTZo0id588830Y2effTZdddVVWfv/9a9/pdGjR5NpmnTyySfTSy+91Mslzk0dOc/Dhw8nAM1u8+fP7/2C56COfqYzcZhpv46e5zfeeIMmT55MlmXRyJEj6Ze//CXZtt3Lpc49HTnPyWSS7rrrLho1ahT5fD4qKyuj66+/niorK3u/4DnkX//6V4u/c1Pn9qqrrqKzzz672XMmTJhApmnSyJEj6fHHH+/xcvKq2YwxxhjLadxnhjHGGGM5jcMMY4wxxnIahxnGGGOM5TQOM4wxxhjLaRxmGGOMMZbTOMwwxhhjLKdxmGGMMcZYTuMwwxhjjLGcxmGGsQFqxIgRePDBB9u9/xNPPIH8/PwOvcb06dNx8803d+g5ACCEwNKlSzv8PMYYawmHGcZ6ya5duyCEwKZNm7K2X3311Zg1a1aflCnTZZddho8++qhDz1m8eDHuvvvu9P2OBijWOzoTVBnLJXpfF4Ax1j/4/X74/f4OPaewsLCHStOzEokEr0jN2ADCNTOMdaPly5fjy1/+MvLz81FUVISvfe1r+OSTTwAAxx13HADgtNNOgxAC06dPx1133YUnn3wSf//73yGEgBACK1euBAD86Ec/wujRoxEIBDBy5Ej85Cc/QTKZzHq9F154ARMnToTP50NxcTFmz57datn++Mc/Ij8/HytWrGjx8aZ/vd91112YMGECnnrqKYwYMQJ5eXm4/PLLUVtbm94ns5lp+vTp2L17N2655Zb0e2mv+fPno7S0FFu2bMFDDz2EcePGpR9bunQphBB49NFH09tmzJiBO++8EwDwySef4JJLLkFJSQlCoRAmTpyIV199Nev4I0aMwN13343vfve7iEQiuO666wAACxcuRFlZGQKBAGbPno37778/6xxs3rwZ55xzDsLhMCKRCM444wy88847rb6PqqoqXHvttRg0aBAikQjOPfdcbN68udk5feyxx9Kve+mll6K6ujq9z8qVKzFp0iQEg0Hk5+dj2rRp2L17d5vnsLWyrly5Etdccw2qq6vTP5e77roLABCPx3HrrbfimGOOQTAYxOTJk9OfP6DxM7F06VKccMIJ8Pl8uOCCC/DZZ5+1WR7GehOHGca6UX19PebNm4d33nkHK1asgJQSs2fPhlIK69evBwC8+uqr2LdvHxYvXoxbb70Vl156KS688ELs27cP+/btw9SpUwEA4XAYTzzxBLZu3Yrf/OY3WLhwIR544IH0a7300kuYPXs2LrroImzcuBErVqzApEmTWizXr3/9a9x222145ZVXcN5557X7/XzyySdYunQpXnzxRbz44otYtWoV7r333hb3Xbx4MYYNG4af//zn6ffSFiLCjTfeiD//+c9YvXo1xo8fj7PPPhtbt27FoUOHAACrVq1CcXFx+iKbTCaxbt06TJ8+HQBQV1eHiy66CCtWrMDGjRtx4YUXYubMmdizZ0/Wa91333049dRTsXHjRvzkJz/B2rVr8f3vfx833XQTNm3ahPPPPx+//OUvs54zZ84cDBs2DG+//TY2bNiA2267DYZhtPp+vvWtb+HgwYN4+eWXsWHDBpx++uk477zzUFFRkd5nx44d+Otf/4oXXngBy5cvx8aNG3H99dcDAGzbxqxZs3D22Wdjy5YtWLduHa677rp2BcPWyjp16lQ8+OCDiEQi6Z/LrbfeCgC44YYbsG7dOixatAhbtmzBt771LVx44YX4+OOP08eNRqP45S9/iT//+c9Yu3YtqqqqcPnll7dZHsZ6VY+vy83YUezQoUMEgN577z3auXMnAaCNGzdm7XPVVVfRJZdc0uax/vM//5POOOOM9P0pU6bQnDlzWt1/+PDh9MADD9APf/hDKi0tpffff/+Ix3/88ccpLy8vfX/+/PkUCASopqYmve0HP/gBTZ48OX3/7LPPpptuuqnZa7YFAD333HN0xRVX0NixY+nzzz9PP6aUoqKiInruueeIiGjChAm0YMECGjJkCBERrVmzhgzDoPr6+laPf/LJJ9Pvfve7rHLNmjUra5/LLruMLr744qxtc+bMyToH4XCYnnjiiTbfDxHR6tWrKRKJUCwWy9o+atQoeuyxx4jIPaeapmW935dffpmklLRv3z46fPgwAaCVK1e26zUzHamsTX+2RES7d+8mTdNo7969WdvPO+88uv3229PPA0Bvvvlm+vFt27YRAHrrrbc6XEbGegrXzDDWjT7++GN8+9vfxsiRIxGJRDBixAgAaFZL0B7PPvsspk2bhiFDhiAUCuHOO+/MOs6mTZvarGX5r//6LyxcuBBr1qzBySef3OEyjBgxAuFwOH2/tLQUBw8e7PBxWnLLLbfgrbfewuuvv45jjjkmvV0Iga985StYuXIlqqqqsHXrVlx//fWIx+P48MMPsWrVKkycOBGBQACAWzNz6623YuzYscjPz0coFMK2bduanfMzzzwz6/727dub1WQ1vT9v3jxce+21mDFjBu699950k2FLNm/ejLq6OhQVFSEUCqVvO3fuzHresccem/V+p0yZAqUUtm/fjsLCQlx99dW44IILMHPmTPzmN79pVw1XR8sKAO+99x4cx8Ho0aOzyrtq1aqs5+q6jokTJ6bvjxkzBvn5+di2bVu7ysVYb+Aww1g3mjlzJioqKrBw4UK89dZbeOuttwC4HU47Yt26dZgzZw4uuugivPjii9i4cSPuuOOOrOO0p7PuWWedBcdx8Ne//rVjb8TTtElFCAGlVKeO1dT555+PvXv34h//+Eezx6ZPn46VK1di9erVOO200xCJRNIBZ9WqVTj77LPT+956661YsmQJ7rnnHqxevRqbNm3CKaec0uycB4PBDpfxrrvuwgcffICLL74Yr732Gk466SQsWbKkxX3r6upQWlqKTZs2Zd22b9+OH/zgB+1+zccffxzr1q3D1KlT8eyzz2L06NF48803u7WsqfJqmoYNGzZklXfbtm34zW9+0+7yMtYfcJhhrJscPnwY27dvx5133onzzjsPY8eORWVlZfrx1OgZx3GynmeaZrNtb7zxBoYPH4477rgDZ555Jk444YRmnUDHjx/famfelEmTJuHll1/GPffcg/vuu68rb69dWnovrfn617+OZ555Btdeey0WLVqU9Viq38xzzz2X7hszffp0vPrqq1i7dm16GwCsXbsWV199NWbPno1TTjkFQ4YMwa5du9p8/RNPPBFvv/121ram9wFg9OjRuOWWW/DKK6/gG9/4Bh5//PEWj3f66adj//790HUdxx9/fNatuLg4vd+ePXvwxRdfpO+/+eabkFLixBNPTG877bTTcPvtt+ONN97AuHHj8Mwzz7T5fo5U1pZ+Lqeddhocx8HBgweblXfIkCHp/Wzbzur0vH37dlRVVWHs2LHtKhNjvYHDDGPdpKCgAEVFRfjDH/6AHTt24LXXXsO8efPSjw8ePBh+vx/Lly/HgQMH0iNYRowYgS1btmD79u0oLy9HMpnECSecgD179mDRokX45JNP8Nvf/rbZX9nz58/HX/7yF8yfPx/btm3De++9h1/96lfNyjV16lQsW7YMP/vZz7LmgHnooYc61Bm4PUaMGIHXX38de/fuRXl5OQBg7969GDNmTLoDdKbZs2fjqaeewjXXXIO//e1v6e3jx49HQUEBnnnmmawws3TpUsTjcUybNi297wknnIDFixdj06ZN2Lx5M6644op21R7deOONWLZsGe6//358/PHHeOyxx/Dyyy+nO9s2NDTghhtuwMqVK7F7926sXbsWb7/9dvoi3vR9zZgxA1OmTMGsWbPwyiuvYNeuXXjjjTdwxx13ZIUBn8+Hq666Cps3b8bq1avxH//xH7j00ksxZMgQ7Ny5E7fffjvWrVuH3bt345VXXsHHH3/cZnBoq6wjRoxAXV0dVqxYgfLyckSjUYwePRpz5szBd7/7XSxevBg7d+7E+vXrsWDBArz00kvpYxuGgRtvvBFvvfUWNmzYgKuvvhpf+tKXWu1szlif6OtOO4wNJP/85z9p7NixZFkWjR8/nlauXEkAaMmSJUREtHDhQiorKyMpJZ199tlERHTw4EE6//zzKRQKEQD617/+RURuZ9uioiIKhUJ02WWX0QMPPNCsE+fzzz9PEyZMINM0qbi4mL7xjW+kH2vaGXfVqlUUDAbpt7/9LRG5nVGHDx+efrylDsCnnnpq1us98MADWc9p2gF43bp1NH78eLIsi1K/XlIdn1Pvi4iyzgkR0bPPPks+n4+ef/759LZLLrmEdF2n2tpaIiJyHIcKCgroS1/6UlaZdu7cSeeccw75/X4qKyujhx56qN0dk//whz/QMcccQ36/n2bNmkW/+MUv0h2N4/E4XX755VRWVkamadLQoUPphhtuoIaGhlbfV01NDd144400dOhQMgyDysrKaM6cObRnz56sc/rII4/Q0KFDyefz0Te/+U2qqKggIqL9+/fTrFmzqLS0lEzTpOHDh9NPf/pTchynWdkztVVWIqLvf//7VFRURABo/vz5RESUSCTopz/9KY0YMYIMw6DS0lKaPXs2bdmyhYgaPxPPP/88jRw5kizLohkzZtDu3buPWB7GepsgIurDLMUYY/3G9773PXz44YdYvXp1jxz/rrvuwtKlS5vNAt1fPfHEE7j55ptRVVXV10Vh7Ih4BmDG2FHrvvvuw/nnn49gMIiXX34ZTz75JB555JG+LhZjrIO4zwxj7Ki1fv16nH/++TjllFPw6KOP4re//S2uvfbavi5Wi04++eSsIdSZt6effrqvi8dYn+JmJsYYywG7d+9utpxFSklJSdZ8QIwdbTjMMMYYYyyncTMTY4wxxnIahxnGGGOM5TQOM4wxxhjLaRxmGGOMMZbTOMwwxhhjLKdxmGGMMcZYTuMwwxhjjLGcxmGGMcYYYznt/wdHNvb7zPQ4NwAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "sns.lineplot(\n", - " data=df,\n", - " y=\"adv_fit_time\",\n", - " x=\"attack.init.kwargs.eps_step\",\n", - " hue=\"model.init.kwargs.kernel\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "sns.lineplot(\n", - " data=df,\n", - " y=\"adv_fit_time\",\n", - " x=\"attack.init.kwargs.batch_size\",\n", - " hue=\"model.init.kwargs.kernel\",\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/gzip/.gitignore b/examples/gzip/.gitignore index 67e77e0e..14be55ba 100644 --- a/examples/gzip/.gitignore +++ b/examples/gzip/.gitignore @@ -7,9 +7,11 @@ kdd_nsl 2-22/* 2-28/* 3-7/* +7-29/* gzip/* ddos/* kdd_nsl/* sms_spam/* truthseeker/* conf/*/best_*.yaml +/params.yaml diff --git a/examples/gzip/batchMixin.py b/examples/gzip/batchMixin.py index 5cc762b7..d21098a4 100644 --- a/examples/gzip/batchMixin.py +++ b/examples/gzip/batchMixin.py @@ -4,12 +4,9 @@ from sklearn.datasets import make_classification -import random - -# from gzip_classifier import GzipSVC, GzipKNN, GzipLogisticRegressor -from sklearn.svm import SVC +from pathlib import Path +from time import time from sklearn.model_selection import train_test_split -import plotext logger = logging.getLogger(__name__) @@ -25,38 +22,96 @@ def __init__( nb_epoch=1, **kwargs, ): - self.batch_size = kwargs.pop("m", batch_size) + self.batch_size = kwargs.pop("batch_size", batch_size) self.max_batches = kwargs.pop("max_batches", max_batches) + self.training_log = kwargs.pop("training_log", None) nb_epoch = kwargs.pop("nb_epoch", nb_epoch) if not nb_epoch >= 1: nb_epoch = 1 self.nb_epoch = nb_epoch - if "m" in kwargs: - logger.warning( - f"Parameter 'm' is being overwritten with batch_size={self.batch_size}.", - ) - kwargs["m"] = self.batch_size super().__init__(**kwargs) - self.predict = self.batched_predict(self.predict) if hasattr(self, "_find_best_samples"): self._find_best_samples = self.batched_find_best_samples( self._find_best_samples, ) - if hasattr(self, "score"): - self.score = self.batched_score(self.score) self.fit = self.batched_fit(self.fit) - self.predict = self.batched_predict(self.predict) if self.nb_epoch > 1: self.fit = self.epoch_fit(self.fit) - # self.score = self.batched_score(self.score) def epoch_fit(self, fit_func): def wrapper(*args, **kwargs): X, y = args - for i in range(self.nb_epoch): - random.shuffle(X) - random.shuffle(y) + X_test = kwargs.pop("X_test", None) + y_test = kwargs.pop("y_test", None) + log_file = self.training_log if hasattr(self, "training_log") else None + for i in tqdm(range(self.nb_epoch), desc="Epochs", leave=True, position=0): + # Shuffle the indices of X,y + indices = np.arange(len(X)) + np.random.shuffle(indices) + X = X[indices] + y = y[indices] + logger.debug(f"Epoch {i + 1}/{self.nb_epoch}") fit_func(X, y, **kwargs) + if hasattr(self, "score"): + score = self.score(X, y) + train_scores.append(score) + if X_test is not None: + assert len(X_test) == len( + y_test, + ), "X_test and y_test must have the same length" + test_score = self.score(X_test, y_test) + test_scores.append(test_score) + logger.info(f"Train score: {score}, Test score: {test_score}") + else: + logger.info(f"Train score: {score}") + if log_file is not None: + if Path(log_file).exists(): + if i == 0: + # rotate the log file by appending a timestamp before the extension + rotated_log_name = log_file.replace( + ".csv", + f"_{int(time())}.csv", + ) + # rename the log file + Path(log_file).rename(rotated_log_name) + with open(log_file, "w") as f: + f.write("epoch, train_score,") + if "test_score" in locals(): + f.write(",test_score") + f.write("\n") + f.write(f"{i+1},") + f.write(f"{score},") + if "test_score" in locals(): + f.write(f" {test_score},") + f.write("\n") + else: + with open(log_file, "a") as f: + # assuming csv format + f.write(f"{i+1},") + f.write(f"{score},") + if "test_score" in locals(): + f.write(f"{test_score},") + f.write("\n") + else: + with open(log_file, "w") as f: + f.write("epoch, train_score,") + if "test_score" in locals(): + f.write(" test_score,") + f.write("\n") + f.write(f"{i+1},") + f.write(f"{score},") + if "test_score" in locals(): + f.write(f"{test_score},") + f.write("\n") + import plotext as plt + + plt.plot(train_scores, label="Train score") + if X_test is not None: + plt.plot(test_scores, label="Test score") + plt.xlabel("Epochs") + plt.ylabel("Accuracy") + plt.title("Scores") + plt.show() return wrapper @@ -72,28 +127,16 @@ def wrapper(*args, **kwargs): n_batches = self.max_batches for i in tqdm( range(n_batches), - desc="Fitting batches", total=n_batches, + desc="Fitting batches", leave=False, - dynamic_ncols=True, + position=1, ): start = i * self.batch_size end = (i + 1) * self.batch_size X_batch = X_train[start:end] y_batch = y_train[start:end] - print( - f"Shape of X_batch is {X_batch.shape} and shape of y_batch is {y_batch.shape}", - ) fit_func(X_batch, y_batch, **kwargs) - if self.nb_epoch > 1: - continue - train_score = self.score(X_batch, y_batch) - test_score = self.score(X_train, y_train) - print( - f"Batch {i+1} of {n_batches} - Train score: {np.mean(train_score)}; Test score: {np.mean(test_score)}", - ) - train_scores.append(train_score) - test_scores.append(test_score) return wrapper @@ -120,8 +163,6 @@ def wrapper(method, **kwargs): new_X = X[i * self.batch_size : (i + 1) * self.batch_size] # noqa new_y = y[i * self.batch_size : (i + 1) * self.batch_size] # noqa indices = func(X=new_X, y=new_y, method=method, n_jobs=n_jobs) - # print("After finding best samples") - # print(f"Length of indices is {len(indices)}") X = X[indices] y = y[indices] self.X_ = X @@ -133,75 +174,6 @@ def wrapper(method, **kwargs): return wrapper - def batched_predict(self, predict_func): - def wrapper(*args, **kwargs): - X_test = args[0] - n = len(X_test) - n_batches = n // self.batch_size - if n_batches > self.max_batches: - n_batches = self.max_batches - elif n_batches == 0: - n_batches = 1 - preds = [] - for i in tqdm( - range(n_batches), - desc="Predicting batches", - total=n_batches, - leave=False, - dynamic_ncols=True, - ): - start = i * self.batch_size - end = (i + 1) * self.batch_size - X_batch = X_test[start:end] - new_preds = predict_func(X_batch, **kwargs) - preds.append(new_preds) - return np.concatenate(preds) - - return wrapper - - def batched_score(self, score_func): - def wrapper(*args, **kwargs): - X_test, y_test = args - n = len(X_test) - n_batches = n // self.batch_size - if n_batches > self.max_batches: - n_batches = self.max_batches - elif n_batches == 0: - n_batches = 1 - scores = [] - for i in tqdm( - range(n_batches), - desc="Scoring batches", - total=n_batches, - leave=False, - dynamic_ncols=True, - ): - start = i * self.batch_size - end = (i + 1) * self.batch_size - X_batch = X_test[start:end] - y_batch = y_test[start:end] - score = score_func(X_batch, y_batch, **kwargs) - scores.append(score) - return scores - - return wrapper - - -def create_batched_class(cls, *args, **kwargs): - name = cls.__name__ - - class BatchedClass(cls, BatchedMixin): - def __init__(self, *args, **kwargs): - self.max_batches = kwargs.pop("max_batches", 100) - self.batch_size = kwargs.pop("batch_size", 10) - super().__init__(*args, **kwargs) - - batched_class = BatchedClass() - combined_name = f"Batched{name}" - batched_class.__name__ = combined_name - batched_class.__init__(*args, **kwargs) - return batched_class - if __name__ == "__main__": logging.basicConfig(level=logging.INFO) @@ -236,20 +208,3 @@ def __init__(self, *args, **kwargs): test_size=0.2, random_state=42, ) - - class BatchedSVC(BatchedMixin, SVC): - pass - - clf = BatchedSVC(max_batches=100, batch_size=100, kernel="rbf") - clf.fit(X_train, y_train) - score = clf.score(X_test, y_test) - print(score) - input("Press enter to continue") - score = round(np.mean(score), 2) - std = round(np.std(score), 3) - logger.info(f"Final Score: {score}") - logger.info(f"Standard Deviation: {std}") - # if plotext_available is True: - plotext.scatter(train_scores, label="Train scores") - plotext.scatter(test_scores, label="Test scores") - plotext.plot() diff --git a/examples/gzip/conf/clean.yaml b/examples/gzip/conf/clean.yaml index c5bc3dd5..0d329632 100644 --- a/examples/gzip/conf/clean.yaml +++ b/examples/gzip/conf/clean.yaml @@ -1,14 +1,3 @@ -# params: - # control: - # data.sample.train_size: 100 - # defaults: - # model.init.m : -1 -# fillna: -# model.init.compressor : "None" -# model.init.metric : "ncd" -# model.init.method : "random" -# model.init.m : ${data.sample.random_state} -# model.init.precompute : "False" replace: model.init.metric: jaro: "Jaro" @@ -18,11 +7,11 @@ replace: ratio: "Ratio" seqRatio: "SeqRatio" hamming: "Hamming" - gzip: "Gzip" + gzip: "GZIP" pkl: "Pickle" bz2: "BZ2" - zstd: "Zstd" - lzma : "Lzma" + zstd: "ZSTD" + lzma : "LZMA" model_name: GzipSVC : "k-SVC" GzipLogisticRegressor : "k-Logistic" @@ -30,6 +19,29 @@ replace: model.init.symmetric: True: "Symmetric" False: "Asymmetric" + model.init.sampling_method: + random : "Random" + medoid : "Medoid" + sum : "Sum" + svc : "SVC" + hardness : "Hardness" + nearmiss : "NearMiss" + knn : "KNN" + dataset: + ddos : "DDoS" + sms_spam : "SMS Spam" + kdd_nsl : "KDD NSL" + truthseeker : "Truthseeker" + model.init.m : + -1 : 1 drop_values: accuracy : 0.00000000000 predict_time : 1.00000000000 +replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model + diff --git a/examples/gzip/conf/condense_knn.yaml b/examples/gzip/conf/condense_knn.yaml index 52bd92be..82b73c54 100644 --- a/examples/gzip/conf/condense_knn.yaml +++ b/examples/gzip/conf/condense_knn.yaml @@ -44,7 +44,7 @@ hydra: _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper direction: ${direction} storage: sqlite:///optuna.db - study_name: ${dataset}_${model_name}_${stage} + study_name: ${dataset}_${model_name}_condense n_trials: 2 n_jobs: 2 max_failure_rate: 1.0 @@ -52,8 +52,7 @@ hydra: model.init.k : 1,3,5,7,11 +model.init.weights : uniform,distance +model.init.algorithm : brute - model.init.symmetric : True,False - ++model.init.precompute : True + model.init.symmetric : True model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name : ${model_name} data.sample.random_state: 0,1,2,3,4,5,6,7,8,9 diff --git a/examples/gzip/conf/condense_logistic.yaml b/examples/gzip/conf/condense_logistic.yaml index 5a585b06..9bb99fbd 100644 --- a/examples/gzip/conf/condense_logistic.yaml +++ b/examples/gzip/conf/condense_logistic.yaml @@ -42,7 +42,7 @@ hydra: n_ei_candidates: 24 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} + study_name: ${dataset}_${model_name}_condense storage: sqlite:///optuna.db n_jobs: 1 n_trials : 1 @@ -53,8 +53,7 @@ hydra: +model.init.C : 1e-2,1e-1,1e0,1e1,1e2 +model.init.fit_intercept : True,False +model.init.class_weight : balanced,None - model.init.symmetric : True,False - ++model.init.precompute : True + model.init.symmetric : True model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name : ${model_name} data.sample.random_state: 0,1,2,3,4,5,6,7,8,9 diff --git a/examples/gzip/conf/condense_svc.yaml b/examples/gzip/conf/condense_svc.yaml index 478c9c97..6f1d3adf 100644 --- a/examples/gzip/conf/condense_svc.yaml +++ b/examples/gzip/conf/condense_svc.yaml @@ -44,7 +44,7 @@ hydra: n_ei_candidates: 24 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ??? + study_name: ${dataset}_${model_name}_condense storage: sqlite:///optuna.db n_jobs: 2 n_trials : 2 @@ -53,8 +53,8 @@ hydra: +model.init.C : 1e-2,1e-1,1e0,1e1,1e2 +model.init.gamma : scale,auto +model.init.class_weight : balanced,null - ++model.init.precompute : True model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model.init.symmetric : True model_name : ${model_name} data.sample.random_state: 0,1,2,3,4,5,6,7,8,9 model.init.m: tag(log, interval(.1, 1)) diff --git a/examples/gzip/conf/condensed_plots.yaml b/examples/gzip/conf/condensed_plots.yaml index 268802a3..de1d9e92 100644 --- a/examples/gzip/conf/condensed_plots.yaml +++ b/examples/gzip/conf/condensed_plots.yaml @@ -1,61 +1,88 @@ -line_plot: - - file : sampling_method_vs_accuracy.pdf - hue: model.init.sampling_method - title: #"Accuracy vs Sampling Method" - x : model.init.m - xlabel: Percentage of Samples per Class +cat_plot: + - file : condensing_method_vs_accuracy.pdf + digitize : Condensing Ratio + x: Condensing Method + hue : Condensing Ratio y : accuracy - ylabel: Accuracy - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim : [0, 1] y_scale : linear - legend: {"title": "Sampling Method", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} - - file: sampling_method_vs_train_time.pdf - hue: model.init.sampling_method - title: #"Training Time vs Sampling Method" - x : model.init.m - xlabel: Percentage of Samples per Class + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + kind : boxen + col : Model + rotation : 45 + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xlabels: "Condensing Method" + ylabels: "Accuracy" + legend_title : "Sample Ratio" + - file: condensing_method_vs_train_time.pdf + x: Condensing Method + hue : Condensing Ratio + digitize : Condensing Ratio y : train_time - ylabel: Training Time (s) - y_scale : linear - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim : [0, 1] - legend: {"title": "Sampling Method", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} - - file : sampling_method_vs_predict_time.pdf - hue: model.init.sampling_method - title: #"Prediction Time vs Sampling Method" - x : model.init.m - xlabel: Percentage of Samples per Class + y_scale : log + kind : boxen + col : Model + rotation : 45 + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - k-NN + xlabels: "Condensing Method" + ylabels: "Training Time" + legend_title : "Sample Ratio" + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + - file : condensing_method_vs_predict_time.pdf + x: Condensing Method + hue : Condensing Ratio + digitize : Condensing Ratio y : predict_time - ylabel: Prediction Time (s) y_scale : log - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim : [0, 1] - legend: {"title": "Sampling Method", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + col : Model + rotation : 45 + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + kind : boxen + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - k-NN + xlabels: "Condensing Method" + ylabels: "Prediction Time" + legend_title : "Sample Ratio" diff --git a/examples/gzip/conf/gzip_knn.yaml b/examples/gzip/conf/gzip_knn.yaml index da8b7ca5..fc9f0b73 100644 --- a/examples/gzip/conf/gzip_knn.yaml +++ b/examples/gzip/conf/gzip_knn.yaml @@ -33,30 +33,26 @@ hydra: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper direction: ${direction} storage: sqlite:///optuna.db study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 + n_trials: 128 + n_jobs: 8 max_failure_rate: 1.0 params: model.init.k : 1,3,5,7,11 +model.init.weights : uniform,distance +model.init.algorithm : brute - model.init.symmetric : True,False - ++model.init.precompute : True - model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name : ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 diff --git a/examples/gzip/conf/gzip_logistic.yaml b/examples/gzip/conf/gzip_logistic.yaml index 3636c201..e7d9f4d0 100644 --- a/examples/gzip/conf/gzip_logistic.yaml +++ b/examples/gzip/conf/gzip_logistic.yaml @@ -33,31 +33,28 @@ hydra: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 1 - n_trials : 1 + n_trials: 128 + n_jobs: 8 params: +model.init.solver: saga - +model.init.penalty : l2,l1,l2,none - +model.init.tol : 1e-4,1e-3,1e-2 - +model.init.C : 1e-2,1e-1,1e0,1e1,1e2 + +model.init.penalty : l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C : tag(log, interval(1e-3, 1e3)) +model.init.fit_intercept : True,False +model.init.class_weight : balanced,None - model.init.symmetric : True,False - ++model.init.precompute : True - model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model_name : ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: diff --git a/examples/gzip/conf/gzip_svc.yaml b/examples/gzip/conf/gzip_svc.yaml index 42212998..4c20c962 100644 --- a/examples/gzip/conf/gzip_svc.yaml +++ b/examples/gzip/conf/gzip_svc.yaml @@ -35,29 +35,25 @@ hydra: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials : 2 + n_trials: 128 + n_jobs: 8 params: +model.init.kernel : rbf,precomputed - +model.init.C : 1e-2,1e-1,1e0,1e1,1e2 + +model.init.C : tag(log, interval(1e-3, 1e3)) +model.init.gamma : scale,auto +model.init.class_weight : balanced,null - model.init.symmetric : True,False - ++model.init.precompute : True - model.init.metric : gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name : ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: diff --git a/examples/gzip/conf/merged_plots.yaml b/examples/gzip/conf/merged_plots.yaml new file mode 100644 index 00000000..5226c4bd --- /dev/null +++ b/examples/gzip/conf/merged_plots.yaml @@ -0,0 +1,372 @@ +cat_plot: + - file: models_vs_accuracy.pdf + x : Model + y : accuracy + hue : data.sample.train_size + errorbar: se + kind : boxen + titles : + xlabels : " " + ylabels : Accuracy + legend_title: "Samples" + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + rotation: 90 + col : Dataset + order: + - k-KNN + - k-SVC + - k-Logistic + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + - file: models_vs_train_time.pdf + x : Model + y : train_time + hue : data.sample.train_size + errorbar: se + kind : boxen + titles : + xlabels : " " + ylabels : $t_t$ (s) + legend_title: "Samples" + rotation: 90 + col : Dataset + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + y_scale : log + order: + - k-KNN + - k-SVC + - k-Logistic + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + - file: models_vs_predict_time.pdf + x : Model + y : predict_time_per_sample + hue : data.sample.train_size + errorbar: se + kind : boxen + titles : + xlabels : " " + ylabels : $t_i$ (s) + legend_title: "Samples" + col : Dataset + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + rotation: 90 + y_scale : log + order: + - k-KNN + - k-SVC + - k-Logistic + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + - file: symmetric_models_vs_accuracy.pdf + row : Model + x : data.sample.train_size + y : accuracy + hue : Symmetric + errorbar: se + kind : boxen + titles : + xlabels : "Samples" + ylabels : Accuracy + legend_title: " " + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + rotation: 90 + col : Dataset + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + row_order: + - k-KNN + - k-SVC + - k-Logistic + - file: symmetric_models_vs_train_time.pdf + row : Model + x : data.sample.train_size + y : train_time_per_sample + hue : Symmetric + errorbar: se + kind : boxen + titles : + xlabels : " " + ylabels : $t_t$ (s) + legend_title: " " + rotation: 90 + col : Dataset + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + y_scale : log + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + row_order: + - k-KNN + - k-SVC + - k-Logistic + - file: symmetric_models_vs_predict_time.pdf + x : data.sample.train_size + row : Model + y : predict_time_per_sample + hue : Symmetric + errorbar: se + kind : boxen + titles : + xlabels : " " + ylabels : $t_i$ (s) + legend_title: " " + col : Dataset + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + rotation: 90 + y_scale : log + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + row_order: + - k-KNN + - k-SVC + - k-Logistic + - file: condensing_methods_vs_accuracy.pdf + x : Model + y : accuracy + hue : Condensing Method + errorbar: se + kind : boxen + titles : + xlabels : " " + ylabels : Accuracy + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + rotation: 90 + col : Dataset + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + order: + - k-KNN + - k-SVC + - k-Logistic + legend_title: "Condensing Method" + - file: condensing_methods_vs_train_time.pdf + x : Model + y : train_time + hue : Condensing Method + errorbar: se + kind : boxen + titles : + xlabels : " " + ylabels : $t_t$ (s) + legend_title: "Condensing Method" + rotation: 90 + col : Dataset + y_scale : log + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + order: + - k-KNN + - k-SVC + - k-Logistic + - file: condensing_methods_vs_predict_time.pdf + x : Model + y : predict_time_per_sample + hue : Condensing Method + errorbar: se + kind : boxen + titles : + xlabels : " " + ylabels : $t_i$ (s) + legend_title: "Condensing Method" + col : Dataset + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + rotation: 90 + y_scale : log + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + order: + - k-KNN + - k-SVC + - k-Logistic +line_plot: + - file: compressor_metric_vs_accuracy.pdf + hue: Metric + title: #"Accuracy vs $m$-best samples across datasets and compressors" + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: [10, 500] + style: Dataset + style_order: + - "DDoS" + - "SMS Spam" + - "KDD NSL" + - "Truthseeker" + legend : + bbox_to_anchor : [1.05, .5] + loc: center left + prop: {"size" : 12} + - file: string_metric_vs_accuracy.pdf + hue : Metric + title: #"Accuracy vs $m$-best samples across datasets and string metrics" + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: [10, 500] + style: Dataset + style_order: + - "DDoS" + - "SMS Spam" + - "KDD NSL" + - "Truthseeker" + legend : + bbox_to_anchor : [1.05, .5] + loc: center left + prop: {"size" : 12} + - file: string_metric_vs_train_time.pdf + hue : Metric + title: #"Accuracy vs $m$-best samples across datasets and string metrics" + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: $t_t$ (s) + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: [10, 500] + style: Dataset + style_order: + - "DDoS" + - "SMS Spam" + - "KDD NSL" + - "Truthseeker" + legend : + bbox_to_anchor : [1.05, .5] + loc: center left + prop: {"size" : 12} + y_scale: log + - file: compressor_metric_vs_train_time.pdf + hue: Metric + title: #"Training Time vs $m$-best samples across datasets and compressors" + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: $t_t$ (s) + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: [10, 500] + style: Dataset + style_order: + - "DDoS" + - "SMS Spam" + - "KDD NSL" + - "Truthseeker" + legend : + bbox_to_anchor : [1.05, .5] + loc: center left + prop: {"size" : 12} + y_scale: log + - file: string_metric_vs_predict_time.pdf + hue : Metric + title: #"Accuracy vs $m$-best samples across datasets and string metrics" + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time_per_sample + ylabel: $t_i$ (s) + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: [10, 500] + style: Dataset + style_order: + - "DDoS" + - "SMS Spam" + - "KDD NSL" + - "Truthseeker" + legend : + bbox_to_anchor : [1.05, .5] + loc: center left + prop: {"size" : 12} + y_scale: log + - file: compressor_metric_vs_predict_time.pdf + hue: Metric + title: #"Prediction Time vs $m$-best samples across datasets and compressors" + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time_per_sample + ylabel: $t_i$ (s) + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: [10, 500] + style: Dataset + style_order: + - "DDoS" + - "SMS Spam" + - "KDD NSL" + - "Truthseeker" + legend : + bbox_to_anchor : [1.05, .5] + loc: center left + prop: {"size" : 12} + y_scale: log diff --git a/examples/gzip/conf/plots.yaml b/examples/gzip/conf/plots.yaml index eac757c4..188f8e2f 100644 --- a/examples/gzip/conf/plots.yaml +++ b/examples/gzip/conf/plots.yaml @@ -1,17 +1,57 @@ line_plot: +- file: compressor_metric_vs_accuracy.pdf + hue: Metric + title: #"Accuracy vs $m$-best samples" + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: [10, 500] + legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} - file: metric_vs_accuracy.pdf - hue: model.init.metric + hue: Metric title: #"Accuracy vs $m$-best samples" x: data.sample.train_size xlabel: Number of Training Samples y: accuracy ylabel: Accuracy hue_order: - - Gzip + - GZIP - Pickle - BZ2 - - Zstd - - Lzma + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: [10, 500] + legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} +- file: string_metric_vs_accuracy.pdf + hue: Metric + title: #"Accuracy vs $m$-best samples" + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + # - GZIP + # - Pickle + # - BZ2 + # - ZSTD + # - LZMA - Levenshtein - Ratio - Hamming @@ -23,7 +63,31 @@ line_plot: xlim: [10, 500] legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} - file: metric_vs_train_time.pdf - hue: model.init.metric + hue: Metric + title: #"Training Time vs $m$-best samples" + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: [10, 500] + legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} +- file: compressor_metric_vs_train_time.pdf + hue: Metric title: #"Training Time vs $m$-best samples" x: data.sample.train_size xlabel: Number of Training Samples @@ -31,11 +95,29 @@ line_plot: ylabel: Training Time (s) y_scale: linear hue_order: - - Gzip + - GZIP - Pickle - BZ2 - - Zstd - - Lzma + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: [10, 500] + legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} +- file: string_metric_vs_train_time.pdf + hue: Metric + title: #"Training Time vs $m$-best samples" + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + # - GZIP + # - Pickle + # - BZ2 + # - ZSTD + # - LZMA - Levenshtein - Ratio - Hamming @@ -46,8 +128,22 @@ line_plot: err_style: bars xlim: [10, 500] legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} +- file: compressor_metric_vs_predict_time.pdf + hue: Metric + title: #"Prediction Time vs $m$-best samples" + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA - file: metric_vs_predict_time.pdf - hue: model.init.metric + hue: Metric title: #"Prediction Time vs $m$-best samples" x: data.sample.train_size xlabel: Number of Training Samples @@ -55,11 +151,26 @@ line_plot: ylabel: Prediction Time (s) y_scale: linear hue_order: - - Gzip + - GZIP - Pickle - BZ2 - - Zstd - - Lzma + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio +- file: string_metric_vs_predict_time.pdf + hue: Metric + title: #"Prediction Time vs $m$-best samples" + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: - Levenshtein - Ratio - Hamming @@ -71,99 +182,166 @@ line_plot: xlim: [10, 500] legend: {"title": "Metrics", "bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} cat_plot: - - file: symmetric_vs_metric.pdf - x : model.init.symmetric + - file: symmetric_vs_compressor_metric.pdf + x : Metric y : accuracy - hue : model.init.metric + hue : Symmetric errorbar: se - kind : bar - titles : - xlabels : "" + kind : boxen + titles : " " + xlabels : "Compressor" ylabels : Accuracy legend_title: "Metrics" - hue_order: - - Gzip + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + hue_order : + - Asymmetric + - Symmetric + # - Levenshtein + # - Ratio + # - Hamming + # - Jaro + # - Jaro-Winkler + # - SeqRatio + rotation: 90 + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + - file: symmetric_vs_string_metric.pdf + x : Metric + y : accuracy + hue : Symmetric + errorbar: se + kind : boxen + titles : " " + xlabels : "Compressors" + ylabels : Accuracy + legend_title: " " + order: + # - GZIP + # - Pickle + # - BZ2 + # - ZSTD + # - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order : + - Asymmetric + - Symmetric + rotation: 90 + legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} + - file: symmetric_vs_metric.pdf + x : Metric + y : accuracy + hue : Symmetric + errorbar: se + kind : boxen + titles : " " + xlabels : "Compressors" + ylabels : Accuracy + legend_title: " " + order: + - GZIP - Pickle - BZ2 - - Zstd - - Lzma + - ZSTD + - LZMA - Levenshtein - Ratio - Hamming - Jaro - Jaro-Winkler - SeqRatio + hue_order : + - Asymmetric + - Symmetric + rotation: 90 legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} - set: - yscale: linear - ylim: [0, 1] - file: symmetric_vs_metric_train_time.pdf - x : model.init.symmetric + x : Metric y : train_time - hue : model.init.metric + hue : Symmetric errorbar: se - kind : bar + kind : boxen titles : - xlabels : "" + xlabels : "Metrics" ylabels : Training Time (s) legend_title: "Metrics" - hue_order: - - Gzip + order: + - GZIP - Pickle - BZ2 - - Zstd - - Lzma + - ZSTD + - LZMA - Levenshtein - Ratio - Hamming - Jaro - Jaro-Winkler - SeqRatio + hue_order : + - Asymmetric + - Symmetric + rotation : 90 legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} - set: - yscale: log - - file: models_vs_accuracy.pdf - x : model_name - y : accuracy - hue : data.sample.train_size + y_scale : linear + - file: symmetric_vs_string_metric_train_time.pdf + x : Metric + y : train_time + hue : Symmetric errorbar: se kind : boxen - titles : - xlabels : Model - ylabels : Accuracy - legend_title: "Samples" - - legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} - set: - yscale: linear - ylim: [0, 1] - rotation: 90 - - file: models_vs_train_time.pdf - x : model_name - y : accuracy - hue : data.sample.train_size - errorbar: se - kind : bar - titles : - xlabels : Model + titles : + xlabels : "Compressors" ylabels : Training Time (s) - legend_title: "Samples" - rotation: 90 + legend_title: "String Metrics" + order: + # - GZIP + # - Pickle + # - BZ2 + # - ZSTD + # - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order : + - Asymmetric + - Symmetric + rotation : 90 legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} - set: - yscale: log - - file: models_vs_predict_time.pdf - x : model_name - y : accuracy - hue : data.sample.train_size + - file: symmetric_vs_compressor_metric_train_time.pdf + x : Metric + y : train_time + hue : Symmetric errorbar: se - kind : bar - titles : - xlabels : Model - ylabels : Prediction Time (s) - legend_title: "Samples" - + kind : boxen + titles : + xlabels : "Compressors" + ylabels : Training Time (s) + legend_title: "Metrics" + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + # - Levenshtein + # - Ratio + # - Hamming + # - Jaro + # - Jaro-Winkler + # - SeqRatio + hue_order : + - Asymmetric + - Symmetric + rotation : 90 legend: {"bbox_to_anchor": [1.05, .5], "loc" : "center left", "prop" : {"size" : 14}} - set: - yscale: log - rotation: 90 diff --git a/examples/gzip/dvc.lock b/examples/gzip/dvc.lock index a02a4b1d..afeed250 100644 --- a/examples/gzip/dvc.lock +++ b/examples/gzip/dvc.lock @@ -1,15521 +1,5601 @@ schema: '2.0' stages: - train: - cmd: python -m deckard.layers.experiment train + clean@sms_spam-gzip_knn: + cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_knn.csv -o + sms_spam/plots/clean/gzip_knn.csv -c conf/clean.yaml deps: - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - - path: raw_data/ + - path: sms_spam/reports/gzip_knn.csv hash: md5 - md5: 33d46673e0631bef98be9e8991ed1ed1.dir - size: 50328647 - nfiles: 8 + md5: 2cc3444a2175ce059be641e3c97a3958 + size: 1219660 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: kdd_nsl/reports/train/default/predictions.json - hash: md5 - md5: 986d2f0abe9b96253b196a222a550609 - size: 702 - - path: kdd_nsl/reports/train/default/score_dict.json + - path: sms_spam/plots/clean/gzip_knn.csv hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - test_each_method@knn-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=knn model.init.m=10 files.name=knn - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/knn - ++raise_exception=True ' + md5: 788afe513b0596808b5125d82019c3ae + size: 704722 + clean@sms_spam-gzip_svc: + cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_svc.csv -o + sms_spam/plots/clean/gzip_svc.csv -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: f8a4019adc566855c2a704a0311ff7c4 - size: 489 - - path: params.yaml + - path: sms_spam/reports/gzip_svc.csv hash: md5 - md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 - size: 1467 + md5: c4196fa3f0dbc4a27972b967e7104485 + size: 1327853 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: kdd_nsl/logs/method/knn + - path: sms_spam/plots/clean/gzip_svc.csv hash: md5 - md5: f902bdd8882aa06bba0d1fef19c4a313.dir - size: 11613 - nfiles: 4 - - path: kdd_nsl/reports/train/knn/score_dict.json - hash: md5 - md5: 4e7f0750779df5202e5dec6228f94f99 - size: 490 - test_each_method@knn-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=knn model.init.m=10 files.name=knn - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn - hydra.run.dir=truthseeker/logs/method/knn ++raise_exception=True ' + md5: 75d1640476b0bfb25b015190f8b4d3ed + size: 1077730 + clean@sms_spam-gzip_logistic: + cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_logistic.csv + -o sms_spam/plots/clean/gzip_logistic.csv -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: f8a4019adc566855c2a704a0311ff7c4 - size: 489 - - path: params.yaml + - path: sms_spam/reports/gzip_logistic.csv hash: md5 - md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 - size: 1467 + md5: 0b87e1a278e97393093edfa85a6c3647 + size: 1324676 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: truthseeker/logs/method/knn + - path: sms_spam/plots/clean/gzip_logistic.csv hash: md5 - md5: 5a52da2681ff444c53a1623722c2d431.dir - size: 11642 - nfiles: 4 - - path: truthseeker/reports/train/knn/score_dict.json - hash: md5 - md5: f09f746efa5c7a56f4dd1a3e20a7ab6b - size: 485 - test_each_method@svc-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=svc model.init.m=10 files.name=svc - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/svc - ++raise_exception=True ' + md5: 66fb493c5dac4d615c1047e8c4432846 + size: 954789 + clean@sms_spam-condense/knn: + cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/knn.csv + -o sms_spam/plots/clean/condense/knn.csv -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: f8a4019adc566855c2a704a0311ff7c4 - size: 489 - - path: params.yaml + - path: sms_spam/reports/condense/knn.csv hash: md5 - md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 - size: 1467 + md5: 905472e105c51a514aa316767bce543e + size: 1313303 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: kdd_nsl/logs/method/svc + - path: sms_spam/plots/clean/condense/knn.csv hash: md5 - md5: 433b30d37ba64e71527ac2d837b44fa2.dir - size: 11612 - nfiles: 4 - - path: kdd_nsl/reports/train/svc/score_dict.json - hash: md5 - md5: f41538adb6ffa9182ea126c85c353abf - size: 489 - test_each_method@svc-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=svc model.init.m=10 files.name=svc - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn - hydra.run.dir=truthseeker/logs/method/svc ++raise_exception=True ' + md5: ca86373d57bc8ef7b33d53d4113d5b17 + size: 859047 + clean@sms_spam-condense/svc: + cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/svc.csv + -o sms_spam/plots/clean/condense/svc.csv -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: f8a4019adc566855c2a704a0311ff7c4 - size: 489 - - path: params.yaml + - path: sms_spam/reports/condense/svc.csv hash: md5 - md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 - size: 1467 + md5: 63204fb6e188d4166e415c86e305631d + size: 1399188 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: truthseeker/logs/method/svc - hash: md5 - md5: bc37655235ef0d2919a62c85456d379c.dir - size: 11645 - nfiles: 4 - - path: truthseeker/reports/train/svc/score_dict.json + - path: sms_spam/plots/clean/condense/svc.csv hash: md5 - md5: 97f1fed3ee2887773ca9a50eeeb5b1ed - size: 488 - test_each_method@medoid-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=medoid model.init.m=10 files.name=medoid - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/medoid - ++raise_exception=True ' + md5: c91f0d6cc570e6ea8fe093ba67ea5da8 + size: 1142139 + clean@sms_spam-condense/logistic: + cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/logistic.csv + -o sms_spam/plots/clean/condense/logistic.csv -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: f8a4019adc566855c2a704a0311ff7c4 - size: 489 - - path: params.yaml + - path: sms_spam/reports/condense/logistic.csv hash: md5 - md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 - size: 1467 + md5: 5d331b32fbe15e0cdc7611fc3aa946a2 + size: 3983718 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: kdd_nsl/logs/method/medoid - hash: md5 - md5: 5b972c1f6a8c4ebff94a088e2be12b28.dir - size: 11661 - nfiles: 4 - - path: kdd_nsl/reports/train/medoid/score_dict.json + - path: sms_spam/plots/clean/condense/logistic.csv hash: md5 - md5: 10a0913632dea0d6717263ba1854b1e2 - size: 484 - test_each_method@medoid-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=medoid model.init.m=10 files.name=medoid - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=medoid - hydra.run.dir=truthseeker/logs/method/medoid ++raise_exception=True ' + md5: 6d5bc96d209d77fefaf76e73109b26ac + size: 2257621 + merge@sms_spam: + cmd: python merge.py --big_dir sms_spam/plots/ --data_file clean/gzip_knn.csv + --little_dir_data_file clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder + sms_spam/plots --output_file merged.csv deps: - - path: kdd_nsl/reports/train/default/score_dict.json + - path: sms_spam/plots/clean/gzip_knn.csv hash: md5 - md5: 064e5bb42979e36c917c538b2a7bc0cc - size: 489 - - path: params.yaml - hash: md5 - md5: 8e937140db56a135e97c05461c573520 - size: 1345 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/logs/method/medoid - hash: md5 - md5: 7b6fef8487e5b8dec0f76f4b4fc59ccb.dir - size: 10226 - nfiles: 4 - - path: truthseeker/reports/train/medoid/score_dict.json - hash: md5 - md5: 8cebb3ee0098d2ee2bb4130e346e8e0f - size: 282 - test_each_method@sum-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=sum model.init.m=10 files.name=sum - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/sum - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json + md5: 788afe513b0596808b5125d82019c3ae + size: 704722 + - path: sms_spam/plots/clean/gzip_logistic.csv hash: md5 - md5: f8a4019adc566855c2a704a0311ff7c4 - size: 489 - - path: params.yaml + md5: 66fb493c5dac4d615c1047e8c4432846 + size: 954789 + - path: sms_spam/plots/clean/gzip_svc.csv hash: md5 - md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 - size: 1467 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + md5: 75d1640476b0bfb25b015190f8b4d3ed + size: 1077730 outs: - - path: kdd_nsl/logs/method/sum - hash: md5 - md5: 41cd7632a1d85e7380d14b0e8eccc819.dir - size: 11607 - nfiles: 4 - - path: kdd_nsl/reports/train/sum/score_dict.json + - path: sms_spam/plots/merged.csv hash: md5 - md5: 2a97e468ea2e9071e1f7d5bdb1e7495b - size: 484 - test_each_method@sum-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=sum model.init.m=10 files.name=sum - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=sum - hydra.run.dir=truthseeker/logs/method/sum ++raise_exception=True ' + md5: 4baf51fdcc220aedc6443147a057559e + size: 2765074 + merge_condense@sms_spam: + cmd: python merge.py --big_dir sms_spam/plots/ --data_file clean/condense/knn.csv + --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder + sms_spam/plots/ --output_file condensed_merged.csv deps: - - path: kdd_nsl/reports/train/default/score_dict.json + - path: sms_spam/plots/clean/condense/knn.csv hash: md5 - md5: 064e5bb42979e36c917c538b2a7bc0cc - size: 489 - - path: params.yaml + md5: ca86373d57bc8ef7b33d53d4113d5b17 + size: 859047 + - path: sms_spam/plots/clean/condense/logistic.csv hash: md5 - md5: 8e937140db56a135e97c05461c573520 - size: 1345 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/logs/method/sum + md5: 6d5bc96d209d77fefaf76e73109b26ac + size: 2257621 + - path: sms_spam/plots/clean/condense/svc.csv hash: md5 - md5: e7f9741f777d98f3d3416264b9f3e6b2.dir - size: 10164 - nfiles: 4 - - path: truthseeker/reports/train/sum/score_dict.json + md5: c91f0d6cc570e6ea8fe093ba67ea5da8 + size: 1142139 + outs: + - path: sms_spam/plots/condensed_merged.csv hash: md5 - md5: d49a3cbdeb348bbf9ad3b59e9e8e0e32 - size: 283 - test_each_method@random-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=random model.init.m=10 files.name=random - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn hydra.run.dir=kdd_nsl/logs/method/random - ++raise_exception=True ' + md5: aff0ab5439e406220d4c0c95d7032f71 + size: 4293513 + plot@sms_spam: + cmd: python -m deckard.layers.plots --path sms_spam/plots/ --file sms_spam/plots/merged.csv -c + conf/plots.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json + - path: conf/plots.yaml hash: md5 - md5: f8a4019adc566855c2a704a0311ff7c4 - size: 489 - - path: params.yaml + md5: 43e3ec0876b55c83f231615f7a904e33 + size: 7386 + - path: sms_spam/plots/merged.csv hash: md5 - md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 - size: 1467 + md5: 4baf51fdcc220aedc6443147a057559e + size: 2765074 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/logs/method/random - hash: md5 - md5: 723e8c93428a09edb21943a20fca5c3c.dir - size: 11639 - nfiles: 4 - - path: kdd_nsl/reports/train/random/score_dict.json - hash: md5 - md5: ed402e68904e8888b8ba6b0bebf6fa05 - size: 488 - test_each_method@random-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=random model.init.m=10 files.name=random - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn - hydra.run.dir=truthseeker/logs/method/random ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: f8a4019adc566855c2a704a0311ff7c4 - size: 489 - - path: params.yaml - hash: md5 - md5: f6a5538a55c3c37d8a2d6d1d4eb95ec2 - size: 1467 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/logs/method/random - hash: md5 - md5: f785fe50b4007a169c37e6e9cb856268.dir - size: 11670 - nfiles: 4 - - path: truthseeker/reports/train/random/score_dict.json - hash: md5 - md5: 8bfb4b2efa55e9944cec7331401762f9 - size: 485 - prepare_distance_matrices@0-10-kdd_nsl: - cmd: python -m deckard.layers.optimise files.name=0-10 stage=train data=kdd_nsl - dataset=kdd_nsl data.sample.random_state=0 data.sample.train_size=10 dataset=kdd_nsl - files.directory=kdd_nsl model_name=gzip_classifier model=gzip_classifier model.init.distance_matrix=kdd_nsl/model/gzip_classifier/gzip/0-10.npz - model.init.method=random model.init.m=100 ++raise_exception=True - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 3332d80113acf55f8e69e46aea82a1cc - size: 412 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: - https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/d6a22cdb42a1db624c89f0298cb4f654d3812703/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: - https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/d6a22cdb42a1db624c89f0298cb4f654d3812703/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: kdd_nsl/model/gzip_classifier/gzip/0-100.npz - k: 1 - m: -1 - method: - name: gzip_classifier.GzipClassifier - library: sklearn - model_name: gzip_classifier - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/model/gzip_classifier/gzip/0-10.npz - hash: md5 - md5: 1b745ff8dbc88f247f3245d9efd6de7e - size: 208 - - path: kdd_nsl/reports/train/0-10/score_dict.json - hash: md5 - md5: cae521db2dcda14d0d3ed880c26adf62 - size: 233 - prepare_distance_matrices@0-100-kdd_nsl: - cmd: python -m deckard.layers.optimise files.name=0-100 stage=train data=kdd_nsl - dataset=kdd_nsl data.sample.random_state=0 data.sample.train_size=100 dataset=kdd_nsl - files.directory=kdd_nsl model_name=gzip_classifier model=gzip_classifier model.init.distance_matrix=kdd_nsl/model/gzip_classifier/gzip/0-100.npz - model.init.method=random model.init.m=100 ++raise_exception=True - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 222b4b55b1b16639ce30218bf60c1f32 - size: 412 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: - https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/d6a22cdb42a1db624c89f0298cb4f654d3812703/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - data: - cmd: python data_prep.py - deps: - - path: data_prep.py - hash: md5 - md5: 18244c921ed2d7cbf25b8362b3ca33aa - size: 5146 - outs: - - path: raw_data/ - hash: md5 - md5: 33d46673e0631bef98be9e8991ed1ed1.dir - size: 50328647 - nfiles: 8 - test_symmetric_methods@true-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random - model.init.m=10 files.name=symmetric_true files.directory=kdd_nsl data=kdd_nsl - dataset=kdd_nsl model_name=gzip_knn model.init.symmetric=true hydra.run.dir=kdd_nsl/logs/symmetric/true - model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/symmetric_true.npz ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - - path: raw_data/ - hash: md5 - md5: d897229dd67895957a0a4330ce95b09a.dir - size: 42279674 - nfiles: 4 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/model/gzip_knn/None/symmetric_true.npz - hash: md5 - md5: 1b745ff8dbc88f247f3245d9efd6de7e - size: 208 - - path: kdd_nsl/reports/train/symmetric_true/score_dict.json - hash: md5 - md5: bb10a010ac3f8790cdbe4310288efc63 - size: 432 - test_symmetric_methods@true-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random - model.init.m=10 files.name=symmetric_true files.directory=truthseeker data=truthseeker - dataset=truthseeker model_name=gzip_knn model.init.symmetric=true hydra.run.dir=truthseeker/logs/symmetric/true - model.init.distance_matrix=truthseeker/model/gzip_knn/None/symmetric_true.npz - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - - path: raw_data/ - hash: md5 - md5: d897229dd67895957a0a4330ce95b09a.dir - size: 42279674 - nfiles: 4 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/model/gzip_knn/None/symmetric_true.npz - hash: md5 - md5: f71a2727e708fdfb7867a6983f3aa8cf - size: 223 - - path: truthseeker/reports/train/symmetric_true/score_dict.json - hash: md5 - md5: 6d7a4eb01733e4e2fda1c40b5562646c - size: 434 - test_symmetric_methods@true-sms_spam: - cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random - model.init.m=10 files.name=symmetric_true files.directory=sms_spam data=sms_spam - dataset=sms_spam model_name=gzip_knn model.init.symmetric=true hydra.run.dir=sms_spam/logs/symmetric/true - model.init.distance_matrix=sms_spam/model/gzip_knn/None/symmetric_true.npz ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - - path: raw_data/ - hash: md5 - md5: d897229dd67895957a0a4330ce95b09a.dir - size: 42279674 - nfiles: 4 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: sms_spam/model/gzip_knn/None/symmetric_true.npz - hash: md5 - md5: 1b745ff8dbc88f247f3245d9efd6de7e - size: 208 - - path: sms_spam/reports/train/symmetric_true/score_dict.json - hash: md5 - md5: 0b8d690ffca7173942d490a2f0cbeec4 - size: 432 - test_symmetric_methods@true-ddos: - cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random - model.init.m=10 files.name=symmetric_true files.directory=ddos data=ddos dataset=ddos - model_name=gzip_knn model.init.symmetric=true hydra.run.dir=ddos/logs/symmetric/true - model.init.distance_matrix=ddos/model/gzip_knn/None/symmetric_true.npz ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - - path: raw_data/ - hash: md5 - md5: d897229dd67895957a0a4330ce95b09a.dir - size: 42279674 - nfiles: 4 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/model/gzip_knn/None/symmetric_true.npz - hash: md5 - md5: 1b745ff8dbc88f247f3245d9efd6de7e - size: 208 - - path: ddos/reports/train/symmetric_true/score_dict.json - hash: md5 - md5: 2c12176f8bf7355f284e059b2527cf44 - size: 418 - test_symmetric_methods@false-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random - model.init.m=10 files.name=symmetric_false files.directory=kdd_nsl data=kdd_nsl - dataset=kdd_nsl model_name=gzip_knn model.init.symmetric=false hydra.run.dir=kdd_nsl/logs/symmetric/false - model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/symmetric_false.npz ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - - path: raw_data/ - hash: md5 - md5: d897229dd67895957a0a4330ce95b09a.dir - size: 42279674 - nfiles: 4 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/model/gzip_knn/None/symmetric_false.npz - hash: md5 - md5: 9a9fcf9ba5dbc34eb2ca1f203088fc47 - size: 740 - - path: kdd_nsl/reports/train/symmetric_false/score_dict.json - hash: md5 - md5: 8ae56e642565330a37e731472a6c2d76 - size: 429 - test_symmetric_methods@false-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random - model.init.m=10 files.name=symmetric_false files.directory=truthseeker data=truthseeker - dataset=truthseeker model_name=gzip_knn model.init.symmetric=false hydra.run.dir=truthseeker/logs/symmetric/false - model.init.distance_matrix=truthseeker/model/gzip_knn/None/symmetric_false.npz - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - - path: raw_data/ - hash: md5 - md5: d897229dd67895957a0a4330ce95b09a.dir - size: 42279674 - nfiles: 4 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/model/gzip_knn/None/symmetric_false.npz - hash: md5 - md5: b02cc76ddfb10d1e0e63e0f6e05cdaae - size: 1791 - - path: truthseeker/reports/train/symmetric_false/score_dict.json - hash: md5 - md5: 4ef36cb0b198d778dc8e0e6ff282d778 - size: 433 - test_symmetric_methods@false-sms_spam: - cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random - model.init.m=10 files.name=symmetric_false files.directory=sms_spam data=sms_spam - dataset=sms_spam model_name=gzip_knn model.init.symmetric=false hydra.run.dir=sms_spam/logs/symmetric/false - model.init.distance_matrix=sms_spam/model/gzip_knn/None/symmetric_false.npz - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - - path: raw_data/ - hash: md5 - md5: d897229dd67895957a0a4330ce95b09a.dir - size: 42279674 - nfiles: 4 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: sms_spam/model/gzip_knn/None/symmetric_false.npz - hash: md5 - md5: ac71e5af3607731b783a490caf81c37f - size: 694 - - path: sms_spam/reports/train/symmetric_false/score_dict.json - hash: md5 - md5: 66d92f0ed630b08fbddb1a9c07f13981 - size: 432 - test_symmetric_methods@false-ddos: - cmd: 'python -m deckard.layers.optimise stage=train model.init.method=random - model.init.m=10 files.name=symmetric_false files.directory=ddos data=ddos dataset=ddos - model_name=gzip_knn model.init.symmetric=false hydra.run.dir=ddos/logs/symmetric/false - model.init.distance_matrix=ddos/model/gzip_knn/None/symmetric_false.npz ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - - path: raw_data/ - hash: md5 - md5: d897229dd67895957a0a4330ce95b09a.dir - size: 42279674 - nfiles: 4 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/model/gzip_knn/None/symmetric_false.npz - hash: md5 - md5: 0d3f08d9c6cb8ddc6d3e68f8208c9bc5 - size: 821 - - path: ddos/reports/train/symmetric_false/score_dict.json - hash: md5 - md5: ba81be29d56943d6d573597c93ba8081 - size: 412 - test_each_compressor@gzip-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip files.directory=kdd_nsl - data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/gzip.npz - model.init.compressor=gzip model.init.m=10 hydra.run.dir=kdd_nsl/logs/compressor/gzip - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/reports/train/gzip/score_dict.json - hash: md5 - md5: b3f76b5e7fe68821d9336c4968888b08 - size: 431 - test_each_compressor@gzip-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip files.directory=truthseeker - data=truthseeker dataset=truthseeker model_name=gzip_knn model.init.method=random - model.init.distance_matrix=truthseeker/model/gzip_knn/None/gzip.npz model.init.compressor=gzip model.init.m=10 - hydra.run.dir=truthseeker/logs/compressor/gzip ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/reports/train/gzip/score_dict.json - hash: md5 - md5: df9b8a302dfb3b85b5c3c7623d86383e - size: 434 - test_each_compressor@gzip-sms_spam: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip files.directory=sms_spam - data=sms_spam dataset=sms_spam model_name=gzip_knn model.init.method=random - model.init.distance_matrix=sms_spam/model/gzip_knn/None/gzip.npz model.init.compressor=gzip model.init.m=10 - hydra.run.dir=sms_spam/logs/compressor/gzip ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: sms_spam/reports/train/gzip/score_dict.json - hash: md5 - md5: 39a6710366ed557259ef981fc0b45a6a - size: 432 - test_each_compressor@gzip-ddos: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip files.directory=ddos - data=ddos dataset=ddos model_name=gzip_knn model.init.method=random model.init.distance_matrix=ddos/model/gzip_knn/None/gzip.npz - model.init.compressor=gzip model.init.m=10 hydra.run.dir=ddos/logs/compressor/gzip - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/reports/train/gzip/score_dict.json - hash: md5 - md5: 1919cb29d6196b8dd14c01458e341a6b - size: 414 - test_each_compressor@zstd-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train files.name=zstd files.directory=kdd_nsl - data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/zstd.npz - model.init.compressor=zstd model.init.m=10 hydra.run.dir=kdd_nsl/logs/compressor/zstd - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/reports/train/zstd/score_dict.json - hash: md5 - md5: 868509c201cbb0093818357427896da7 - size: 416 - test_each_compressor@zstd-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train files.name=zstd files.directory=truthseeker - data=truthseeker dataset=truthseeker model_name=gzip_knn model.init.method=random - model.init.distance_matrix=truthseeker/model/gzip_knn/None/zstd.npz model.init.compressor=zstd model.init.m=10 - hydra.run.dir=truthseeker/logs/compressor/zstd ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/reports/train/zstd/score_dict.json - hash: md5 - md5: 89546ca3a3510fd73671341863c69cb9 - size: 434 - test_each_compressor@zstd-sms_spam: - cmd: 'python -m deckard.layers.optimise stage=train files.name=zstd files.directory=sms_spam - data=sms_spam dataset=sms_spam model_name=gzip_knn model.init.method=random - model.init.distance_matrix=sms_spam/model/gzip_knn/None/zstd.npz model.init.compressor=zstd model.init.m=10 - hydra.run.dir=sms_spam/logs/compressor/zstd ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: sms_spam/reports/train/zstd/score_dict.json - hash: md5 - md5: e5a10b0013b032b22dd6cc596a7810bb - size: 429 - test_each_compressor@zstd-ddos: - cmd: 'python -m deckard.layers.optimise stage=train files.name=zstd files.directory=ddos - data=ddos dataset=ddos model_name=gzip_knn model.init.method=random model.init.distance_matrix=ddos/model/gzip_knn/None/zstd.npz - model.init.compressor=zstd model.init.m=10 hydra.run.dir=ddos/logs/compressor/zstd - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/reports/train/zstd/score_dict.json - hash: md5 - md5: 898feb287504053c9de9c1a809733c4b - size: 432 - test_each_compressor@pkl-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train files.name=pkl files.directory=kdd_nsl - data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/pkl.npz - model.init.compressor=pkl model.init.m=10 hydra.run.dir=kdd_nsl/logs/compressor/pkl - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/reports/train/pkl/score_dict.json - hash: md5 - md5: 3e01c227095014ab9f4665ea98e7f3b5 - size: 430 - test_each_compressor@pkl-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train files.name=pkl files.directory=truthseeker - data=truthseeker dataset=truthseeker model_name=gzip_knn model.init.method=random - model.init.distance_matrix=truthseeker/model/gzip_knn/None/pkl.npz model.init.compressor=pkl model.init.m=10 - hydra.run.dir=truthseeker/logs/compressor/pkl ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/reports/train/pkl/score_dict.json - hash: md5 - md5: 85d4598fcbe6077a465a9edeadd3843a - size: 430 - test_each_compressor@pkl-sms_spam: - cmd: 'python -m deckard.layers.optimise stage=train files.name=pkl files.directory=sms_spam - data=sms_spam dataset=sms_spam model_name=gzip_knn model.init.method=random - model.init.distance_matrix=sms_spam/model/gzip_knn/None/pkl.npz model.init.compressor=pkl model.init.m=10 - hydra.run.dir=sms_spam/logs/compressor/pkl ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: sms_spam/reports/train/pkl/score_dict.json - hash: md5 - md5: a4667414e7721ee7ed489df1e412e0b0 - size: 431 - test_each_compressor@pkl-ddos: - cmd: 'python -m deckard.layers.optimise stage=train files.name=pkl files.directory=ddos - data=ddos dataset=ddos model_name=gzip_knn model.init.method=random model.init.distance_matrix=ddos/model/gzip_knn/None/pkl.npz - model.init.compressor=pkl model.init.m=10 hydra.run.dir=ddos/logs/compressor/pkl - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/reports/train/pkl/score_dict.json - hash: md5 - md5: 340261dd836239b846699c4c687b3042 - size: 432 - test_each_compressor@bz2-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train files.name=bz2 files.directory=kdd_nsl - data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/bz2.npz - model.init.compressor=bz2 model.init.m=10 hydra.run.dir=kdd_nsl/logs/compressor/bz2 - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/reports/train/bz2/score_dict.json - hash: md5 - md5: 05fd4b45d252c648d4afb4ba3ffc05e4 - size: 430 - test_each_compressor@bz2-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train files.name=bz2 files.directory=truthseeker - data=truthseeker dataset=truthseeker model_name=gzip_knn model.init.method=random - model.init.distance_matrix=truthseeker/model/gzip_knn/None/bz2.npz model.init.compressor=bz2 model.init.m=10 - hydra.run.dir=truthseeker/logs/compressor/bz2 ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/reports/train/bz2/score_dict.json - hash: md5 - md5: 1b3094ea4075cb1b5b8cd3f74bf0c3dc - size: 432 - test_each_compressor@bz2-sms_spam: - cmd: 'python -m deckard.layers.optimise stage=train files.name=bz2 files.directory=sms_spam - data=sms_spam dataset=sms_spam model_name=gzip_knn model.init.method=random - model.init.distance_matrix=sms_spam/model/gzip_knn/None/bz2.npz model.init.compressor=bz2 model.init.m=10 - hydra.run.dir=sms_spam/logs/compressor/bz2 ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: sms_spam/reports/train/bz2/score_dict.json - hash: md5 - md5: 45303b7d052fb91e65c9f3ad97999b6a - size: 431 - test_each_compressor@bz2-ddos: - cmd: 'python -m deckard.layers.optimise stage=train files.name=bz2 files.directory=ddos - data=ddos dataset=ddos model_name=gzip_knn model.init.method=random model.init.distance_matrix=ddos/model/gzip_knn/None/bz2.npz - model.init.compressor=bz2 model.init.m=10 hydra.run.dir=ddos/logs/compressor/bz2 - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/reports/train/bz2/score_dict.json - hash: md5 - md5: fdfa470b2053f561dea2e047423b54cd - size: 431 - test_each_precompute@True-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_True - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random - model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/True.npz +model.init.precompute=True model.init.m=10 hydra.run.dir=kdd_nsl/logs/precompute/True - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/reports/train/precompute_True/score_dict.json - hash: md5 - md5: f5c9a9ce41a0680f1e18874d6f21bd25 - size: 433 - test_each_precompute@True-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_True - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn - model.init.method=random model.init.distance_matrix=truthseeker/model/gzip_knn/None/True.npz - +model.init.precompute=True model.init.m=10 hydra.run.dir=truthseeker/logs/precompute/True - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/reports/train/precompute_True/score_dict.json - hash: md5 - md5: 76dcdbf7dc1fb63ce7b978c2f6bef8a2 - size: 435 - test_each_precompute@True-sms_spam: - cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_True - files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn - model.init.method=random model.init.distance_matrix=sms_spam/model/gzip_knn/None/True.npz - +model.init.precompute=True model.init.m=10 hydra.run.dir=sms_spam/logs/precompute/True - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: sms_spam/reports/train/precompute_True/score_dict.json - hash: md5 - md5: fe9a23520513840fe4a90fb8413e62da - size: 432 - test_each_precompute@True-ddos: - cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_True - files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.method=random - model.init.distance_matrix=ddos/model/gzip_knn/None/True.npz +model.init.precompute=True model.init.m=10 hydra.run.dir=ddos/logs/precompute/True - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/reports/train/precompute_True/score_dict.json - hash: md5 - md5: 0d72c99dc99df13629a383ca9745712e - size: 429 - test_each_precompute@False-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_False - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.method=random - model.init.distance_matrix=kdd_nsl/model/gzip_knn/None/False.npz +model.init.precompute=False model.init.m=10 hydra.run.dir=kdd_nsl/logs/precompute/False - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/reports/train/precompute_False/score_dict.json - hash: md5 - md5: d225ea006c02f56f552431e223ef6576 - size: 429 - test_each_precompute@False-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_False - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn - model.init.method=random model.init.distance_matrix=truthseeker/model/gzip_knn/None/False.npz - +model.init.precompute=False model.init.m=10 hydra.run.dir=truthseeker/logs/precompute/False - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/reports/train/precompute_False/score_dict.json - hash: md5 - md5: e8094fb43b55432d298346a0a291ac71 - size: 431 - test_each_precompute@False-sms_spam: - cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_False - files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn - model.init.method=random model.init.distance_matrix=sms_spam/model/gzip_knn/None/False.npz - +model.init.precompute=False model.init.m=10 hydra.run.dir=sms_spam/logs/precompute/False - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: sms_spam/reports/train/precompute_False/score_dict.json - hash: md5 - md5: 0f3b13aba3cc817f2327769f36b54939 - size: 432 - test_each_precompute@False-ddos: - cmd: 'python -m deckard.layers.optimise stage=train files.name=precompute_False - files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.method=random - model.init.distance_matrix=ddos/model/gzip_knn/None/False.npz +model.init.precompute=False model.init.m=10 hydra.run.dir=ddos/logs/precompute/False - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/reports/train/precompute_False/score_dict.json - hash: md5 - md5: 9cc47f921a908ad81e486980d134f453 - size: 418 - test_each_metric@levenshtein-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=levenshtein files.name=levenshtein - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/levenshtein.npz - hydra.sweeper.n_jobs=1 hydra.run.dir=kdd_nsl/logs/metric/levenshtein ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/reports/train/levenshtein/score_dict.json - hash: md5 - md5: 4f517489b794c13bbbbb477bd7b14ea8 - size: 248 - test_each_metric@levenshtein-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=levenshtein files.name=levenshtein - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn - model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/levenshtein.npz hydra.sweeper.n_jobs=1 - hydra.run.dir=truthseeker/logs/metric/levenshtein ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/reports/train/levenshtein/score_dict.json - hash: md5 - md5: 2f0fa43167cde43c2d8c901ee6bc360d - size: 250 - test_each_metric@levenshtein-sms_spam: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=levenshtein files.name=levenshtein - files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn - model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/levenshtein.npz hydra.sweeper.n_jobs=1 - hydra.run.dir=sms_spam/logs/metric/levenshtein ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: sms_spam/reports/train/levenshtein/score_dict.json - hash: md5 - md5: bb8456e5a2457e841619d5750922bd0c - size: 246 - test_each_metric@levenshtein-ddos: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=levenshtein files.name=levenshtein - files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/levenshtein.npz - hydra.sweeper.n_jobs=1 hydra.run.dir=ddos/logs/metric/levenshtein ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/reports/train/levenshtein/score_dict.json - hash: md5 - md5: 1956a0651292bf6919a103e46c0c5906 - size: 248 - test_each_metric@ratio-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=ratio files.name=ratio - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/ratio.npz - hydra.sweeper.n_jobs=1 hydra.run.dir=kdd_nsl/logs/metric/ratio ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/reports/train/ratio/score_dict.json - hash: md5 - md5: 841058c500666af10a3a84fd7769e53d - size: 244 - test_each_metric@ratio-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=ratio files.name=ratio - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn - model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/ratio.npz hydra.sweeper.n_jobs=8 - hydra.run.dir=truthseeker/logs/metric/ratio ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/reports/train/ratio/score_dict.json - hash: md5 - md5: 5cbc24c928a073a9459428d4e1984ba1 - size: 426 - test_each_metric@ratio-sms_spam: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=ratio files.name=ratio - files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn - model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/ratio.npz hydra.sweeper.n_jobs=8 - hydra.run.dir=sms_spam/logs/metric/ratio ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: sms_spam/reports/train/ratio/score_dict.json - hash: md5 - md5: b8ea7bf8de9af2250f1a2c84695be1f9 - size: 425 - test_each_metric@ratio-ddos: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=ratio files.name=ratio - files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/ratio.npz - hydra.sweeper.n_jobs=8 hydra.run.dir=ddos/logs/metric/ratio ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/reports/train/ratio/score_dict.json - hash: md5 - md5: 5f9750a5729db8f4912f50a8610fc48c - size: 429 - test_each_metric@hamming-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=hamming files.name=hamming - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/hamming.npz - hydra.sweeper.n_jobs=8 hydra.run.dir=kdd_nsl/logs/metric/hamming ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/reports/train/hamming/score_dict.json - hash: md5 - md5: ed699605a76c4116a461994f139da237 - size: 429 - test_each_metric@hamming-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=hamming files.name=hamming - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn - model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/hamming.npz hydra.sweeper.n_jobs=8 - hydra.run.dir=truthseeker/logs/metric/hamming ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/reports/train/hamming/score_dict.json - hash: md5 - md5: 8a3f87734f208a61bc27114729fd4fd6 - size: 432 - test_each_metric@hamming-sms_spam: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=hamming files.name=hamming - files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn - model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/hamming.npz hydra.sweeper.n_jobs=8 - hydra.run.dir=sms_spam/logs/metric/hamming ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: sms_spam/reports/train/hamming/score_dict.json - hash: md5 - md5: 0c0988090568dc526d0137ff7e38ca6a - size: 428 - test_each_metric@hamming-ddos: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=hamming files.name=hamming - files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/hamming.npz - hydra.sweeper.n_jobs=8 hydra.run.dir=ddos/logs/metric/hamming ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/reports/train/hamming/score_dict.json - hash: md5 - md5: 949f7ea27f2521fbbb2b05ec3a111346 - size: 428 - test_each_metric@jaro-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro files.name=jaro - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/jaro.npz - hydra.sweeper.n_jobs=8 hydra.run.dir=kdd_nsl/logs/metric/jaro ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/reports/train/jaro/score_dict.json - hash: md5 - md5: 3bd4e5c89097070d439c3f13359ff369 - size: 428 - test_each_metric@jaro-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro files.name=jaro - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn - model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/jaro.npz hydra.sweeper.n_jobs=8 - hydra.run.dir=truthseeker/logs/metric/jaro ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/reports/train/jaro/score_dict.json - hash: md5 - md5: b86d70f18ea7ee85132f4d8407058d60 - size: 429 - test_each_metric@jaro-sms_spam: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro files.name=jaro - files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn - model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/jaro.npz hydra.sweeper.n_jobs=8 - hydra.run.dir=sms_spam/logs/metric/jaro ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: sms_spam/reports/train/jaro/score_dict.json - hash: md5 - md5: b7550248d10852d10a16610f707ea50f - size: 429 - test_each_metric@jaro-ddos: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro files.name=jaro - files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/jaro.npz - hydra.sweeper.n_jobs=8 hydra.run.dir=ddos/logs/metric/jaro ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/reports/train/jaro/score_dict.json - hash: md5 - md5: e7987cb2d248f7eaa20a842bbcacc442 - size: 430 - test_each_metric@jaro_winkler-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro_winkler files.name=jaro_winkler - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/jaro_winkler.npz - hydra.sweeper.n_jobs=8 hydra.run.dir=kdd_nsl/logs/metric/jaro_winkler ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/reports/train/jaro_winkler/score_dict.json - hash: md5 - md5: a44e09663d05f8330352712ccfd72f17 - size: 428 - test_each_metric@jaro_winkler-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro_winkler files.name=jaro_winkler - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn - model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/jaro_winkler.npz hydra.sweeper.n_jobs=8 - hydra.run.dir=truthseeker/logs/metric/jaro_winkler ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/reports/train/jaro_winkler/score_dict.json - hash: md5 - md5: 2a80298804f36bc7af477e11ff9f6679 - size: 428 - test_each_metric@jaro_winkler-sms_spam: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro_winkler files.name=jaro_winkler - files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn - model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/jaro_winkler.npz hydra.sweeper.n_jobs=8 - hydra.run.dir=sms_spam/logs/metric/jaro_winkler ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: sms_spam/reports/train/jaro_winkler/score_dict.json - hash: md5 - md5: 8b7d0f92e14d74042fb8cd907e3a8274 - size: 430 - test_each_metric@jaro_winkler-ddos: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=jaro_winkler files.name=jaro_winkler - files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model.init.distance_matrix=ddos/model/gzip_knn/ncd/jaro_winkler.npz - hydra.sweeper.n_jobs=8 hydra.run.dir=ddos/logs/metric/jaro_winkler ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/reports/train/jaro_winkler/score_dict.json - hash: md5 - md5: aa4130c79130ddbaaebaa35a1cae7d91 - size: 426 - test_each_metric@seqratio-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=seqratio files.name=seqratio - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model.init.distance_matrix=kdd_nsl/model/gzip_knn/ncd/seqratio.npz - hydra.sweeper.n_jobs=8 hydra.run.dir=kdd_nsl/logs/metric/seqratio ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/reports/train/seqratio/score_dict.json - hash: md5 - md5: 9075115a02136aaa59bd87074589ce42 - size: 430 - test_each_metric@seqratio-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=seqratio files.name=seqratio - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn - model.init.distance_matrix=truthseeker/model/gzip_knn/ncd/seqratio.npz hydra.sweeper.n_jobs=8 - hydra.run.dir=truthseeker/logs/metric/seqratio ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/reports/train/seqratio/score_dict.json - hash: md5 - md5: ac2bdff9261ce4c9e511294dd69b19f8 - size: 434 - test_each_metric@seqratio-sms_spam: - cmd: 'python -m deckard.layers.optimise stage=train model.init.metric=seqratio files.name=seqratio - files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn - model.init.distance_matrix=sms_spam/model/gzip_knn/ncd/seqratio.npz hydra.sweeper.n_jobs=8 - hydra.run.dir=sms_spam/logs/metric/seqratio ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 14173762472fe294a1d3228b4ee83d4b - size: 431 - - path: params.yaml - hash: md5 - md5: 4999b48c21cb63a45801003d03576594 - size: 2082 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 2 - library: sklearn - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 10 - train_size: 10 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: - k: 1 - m: -1 - method: - metric: ncd - test_each_method@ddos-random: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=random model.init.m=3 - data.sample.train_size=100 files.name=random files.directory=ddos data=ddos - dataset=ddos model_name=random hydra.run.dir=ddos/logs/method/random ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/logs/method/random - hash: md5 - md5: 3bfcc27fd44bf9333be7081f3fceb94c.dir - size: 8340 - nfiles: 4 - - path: ddos/reports/train/random/score_dict.json - hash: md5 - md5: 218449c8e2b7425707008d01e751eee4 - size: 281 - test_each_method@ddos-medoid: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=medoid model.init.m=3 - data.sample.train_size=100 files.name=medoid files.directory=ddos data=ddos - dataset=ddos model_name=medoid hydra.run.dir=ddos/logs/method/medoid ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/logs/method/medoid - hash: md5 - md5: cab03f71d3883157c103a207662f0f01.dir - size: 8377 - nfiles: 4 - - path: ddos/reports/train/medoid/score_dict.json - hash: md5 - md5: eb281dc186936044bcf39edf3b5c2a97 - size: 283 - test_each_method@ddos-sum: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=sum model.init.m=3 - data.sample.train_size=100 files.name=sum files.directory=ddos data=ddos dataset=ddos - model_name=sum hydra.run.dir=ddos/logs/method/sum ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/logs/method/sum - hash: md5 - md5: 1acd35c26f1f01c1d97695be4df4be9f.dir - size: 8320 - nfiles: 4 - - path: ddos/reports/train/sum/score_dict.json - hash: md5 - md5: d8ee90602dcf3e5e3d1541fd051d8c25 - size: 283 - test_each_method@ddos-svc: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=svc model.init.m=3 - data.sample.train_size=100 files.name=svc files.directory=ddos data=ddos dataset=ddos - model_name=svc hydra.run.dir=ddos/logs/method/svc ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/logs/method/svc - hash: md5 - md5: ff1e2d4db8fbd074fae27c28e6d7efab.dir - size: 8317 - nfiles: 4 - - path: ddos/reports/train/svc/score_dict.json - hash: md5 - md5: 02086eaaafb2de9549a587e0cac8d44f - size: 280 - test_each_method@ddos-condensed: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=condensed model.init.m=1 - files.name=condensed files.directory=ddos data=ddos dataset=ddos model_name=condensed - hydra.run.dir=ddos/logs/method/condensed ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 064e5bb42979e36c917c538b2a7bc0cc - size: 489 - - path: params.yaml - hash: md5 - md5: 8e937140db56a135e97c05461c573520 - size: 1345 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/logs/method/condensed - hash: md5 - md5: 5dfc9ebfe1c6f3e496814c86a05a5329.dir - size: 10117 - nfiles: 4 - - path: ddos/reports/train/condensed/score_dict.json - hash: md5 - md5: 56bcddf54558d9cdd1a7587878aceffa - size: 284 - test_each_method@ddos-hardness: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=hardness model.init.m=3 - data.sample.train_size=100 files.name=hardness files.directory=ddos data=ddos - dataset=ddos model_name=hardness hydra.run.dir=ddos/logs/method/hardness ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/logs/method/hardness - hash: md5 - md5: 92679e897538c5e98e89f11ca456f483.dir - size: 8413 - nfiles: 4 - - path: ddos/reports/train/hardness/score_dict.json - hash: md5 - md5: 24a77200255cec8b4ec9f1877188fdda - size: 281 - test_each_method@ddos-nearmiss: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=nearmiss model.init.m=3 - data.sample.train_size=100 files.name=nearmiss files.directory=ddos data=ddos - dataset=ddos model_name=nearmiss hydra.run.dir=ddos/logs/method/nearmiss ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/logs/method/nearmiss - hash: md5 - md5: 84fc6455a5c576fa04c36919c33ae8fd.dir - size: 8416 - nfiles: 4 - - path: ddos/reports/train/nearmiss/score_dict.json - hash: md5 - md5: b4602181657a738a97631883018e221a - size: 284 - test_each_method@truthseeker-svc: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=svc model.init.m=3 - data.sample.train_size=100 files.name=svc files.directory=truthseeker data=truthseeker - dataset=truthseeker model_name=svc hydra.run.dir=truthseeker/logs/method/svc - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 064e5bb42979e36c917c538b2a7bc0cc - size: 489 - - path: params.yaml - hash: md5 - md5: 8e937140db56a135e97c05461c573520 - size: 1345 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/logs/method/svc - hash: md5 - md5: 7f9ad95f5b5a7d8ea8a41d09560bca7e.dir - size: 10252 - nfiles: 4 - - path: truthseeker/reports/train/svc/score_dict.json - hash: md5 - md5: dca27d752d8d9db2b52a61d9e0d9bebf - size: 283 - test_each_method@truthseeker-medoid: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=medoid model.init.m=3 - data.sample.train_size=100 files.name=medoid files.directory=truthseeker data=truthseeker - dataset=truthseeker model_name=medoid hydra.run.dir=truthseeker/logs/method/medoid - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 064e5bb42979e36c917c538b2a7bc0cc - size: 489 - - path: params.yaml - hash: md5 - md5: 8e937140db56a135e97c05461c573520 - size: 1345 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/logs/method/medoid - hash: md5 - md5: 57b1e2e154ae8653331898992d0d7f7c.dir - size: 10316 - nfiles: 4 - - path: truthseeker/reports/train/medoid/score_dict.json - hash: md5 - md5: a728020aeb632257e52cc9b13337870e - size: 284 - test_each_method@truthseeker-sum: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=sum model.init.m=3 - data.sample.train_size=100 files.name=sum files.directory=truthseeker data=truthseeker - dataset=truthseeker model_name=sum hydra.run.dir=truthseeker/logs/method/sum - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 064e5bb42979e36c917c538b2a7bc0cc - size: 489 - - path: params.yaml - hash: md5 - md5: 8e937140db56a135e97c05461c573520 - size: 1345 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/logs/method/sum - hash: md5 - md5: b8934f0660e9e6043e5a7117d2e3d462.dir - size: 10252 - nfiles: 4 - - path: truthseeker/reports/train/sum/score_dict.json - hash: md5 - md5: 0a4117f35aab6ec4b41ac526f8715aa2 - size: 283 - test_each_method@truthseeker-random: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=random model.init.m=3 - data.sample.train_size=100 files.name=random files.directory=truthseeker data=truthseeker - dataset=truthseeker model_name=random hydra.run.dir=truthseeker/logs/method/random - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 064e5bb42979e36c917c538b2a7bc0cc - size: 489 - - path: params.yaml - hash: md5 - md5: 8e937140db56a135e97c05461c573520 - size: 1345 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/logs/method/random - hash: md5 - md5: a77f4e67f85e529063b18617cda5525a.dir - size: 10289 - nfiles: 4 - - path: truthseeker/reports/train/random/score_dict.json - hash: md5 - md5: 08f3cc499d61caaa4ab912af1a2ff558 - size: 283 - test_each_method@truthseeker-nearmiss: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=nearmiss model.init.m=3 - data.sample.train_size=100 files.name=nearmiss files.directory=truthseeker data=truthseeker - dataset=truthseeker model_name=nearmiss hydra.run.dir=truthseeker/logs/method/nearmiss - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 064e5bb42979e36c917c538b2a7bc0cc - size: 489 - - path: params.yaml - hash: md5 - md5: 8e937140db56a135e97c05461c573520 - size: 1345 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/logs/method/nearmiss - hash: md5 - md5: 6ea3f0a574d7abd052e3ee5466356e13.dir - size: 10359 - nfiles: 4 - - path: truthseeker/reports/train/nearmiss/score_dict.json - hash: md5 - md5: f03918d65cac7f21e210a14be8ee1373 - size: 285 - test_each_method@truthseeker-hardness: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=hardness model.init.m=3 - data.sample.train_size=100 files.name=hardness files.directory=truthseeker data=truthseeker - dataset=truthseeker model_name=hardness hydra.run.dir=truthseeker/logs/method/hardness - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 064e5bb42979e36c917c538b2a7bc0cc - size: 489 - - path: params.yaml - hash: md5 - md5: 8e937140db56a135e97c05461c573520 - size: 1345 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/logs/method/hardness - hash: md5 - md5: c5ea09925ae34a0fee42f1ec06d88090.dir - size: 10355 - nfiles: 4 - - path: truthseeker/reports/train/hardness/score_dict.json - hash: md5 - md5: 87bdbb0cafd4462b87035af79efc81c5 - size: 281 - test_each_method@truthseeker-knn: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=knn model.init.m=3 - data.sample.train_size=100 files.name=knn files.directory=truthseeker data=truthseeker - dataset=truthseeker model_name=knn hydra.run.dir=truthseeker/logs/method/knn - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 064e5bb42979e36c917c538b2a7bc0cc - size: 489 - - path: params.yaml - hash: md5 - md5: 8e937140db56a135e97c05461c573520 - size: 1345 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/logs/method/knn - hash: md5 - md5: 5c5fe8f17151816b01d863f51db3d01a.dir - size: 10254 - nfiles: 4 - - path: truthseeker/reports/train/knn/score_dict.json - hash: md5 - md5: 4157a5deabda43d207a543b9f038b5af - size: 285 - test_each_method@ddos-knn: - cmd: 'python -m deckard.layers.optimise stage=train +model.init.sampling_method=knn model.init.m=3 - data.sample.train_size=100 files.name=knn files.directory=ddos data=ddos dataset=ddos - model_name=knn hydra.run.dir=ddos/logs/method/knn ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: ddos/logs/method/knn - hash: md5 - md5: 8d73125fea91a47efc49ba2b4a68e1fe.dir - size: 8319 - nfiles: 4 - - path: ddos/reports/train/knn/score_dict.json - hash: md5 - md5: fb77e1c8e53bac0e077d2140f1abc6d6 - size: 282 - condense@sms_spam-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.test_size=100 model_name=gzip_logistic model=gzip_logistic hydra.sweeper.study_name=condense_gzip_logistic_sms_spam - hydra.sweeper.n_trials=1 hydra.sweeper.n_jobs=32 hydra.sweep.dir=sms_spam/logs/condense/gzip_logistic/ - hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/study.csv - ++data.sample.train_size='int(interval(30, 1000))' ++data.sample.random_state='int(interval(10000, - 20000))' ++data.sample.stratify=True model.init.m='tag(log, interval(.1, 1))' - +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn files.directory=sms_spam - files.reports=reports/condense/gzip_logistic/ hydra.launcher.n_jobs=32 --config-name - gzip_logistic --multirun - deps: - - path: conf/model/best_gzip_logistic_sms_spam.yaml - hash: md5 - md5: 026fca7fe5d7bb75c4a3ae245f86a2c2 - size: 332 - - path: sms_spam/logs/method/ - hash: md5 - md5: e8e327bbd5859a6c1c362fd482435727.dir - size: 69377 - nfiles: 24 - params: - conf/condense.yaml: - hydra: - run: - dir: ${dataset}/logs/condense/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: ??? - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 2 - direction: ${direction} - params: - ++data.sample.train_size: int(interval(20, 1000)) - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.1, 1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - outs: - - path: sms_spam/logs/condense/gzip_logistic/ - hash: md5 - md5: 9496098bd1497b6c46124e40e665ee74.dir - size: 14280 - nfiles: 5 - - path: sms_spam/reports/condense/gzip_logistic/ - hash: md5 - md5: c7e2a43c1dc170c3d593825f57ad0e9b.dir - size: 2707 - nfiles: 3 - condense@truthseeker-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.test_size=100 model_name=gzip_svc model=gzip_svc hydra.sweeper.study_name=condense_gzip_svc_truthseeker - hydra.sweeper.n_trials=1 hydra.sweeper.n_jobs=32 hydra.sweep.dir=truthseeker/logs/condense/gzip_svc/ - hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/study.csv ++data.sample.train_size='int(interval(30, - 1000))' ++data.sample.random_state='int(interval(10000, 20000))' ++data.sample.stratify=True - model.init.m='tag(log, interval(.1, 1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn - files.directory=truthseeker files.reports=reports/condense/gzip_svc/ hydra.launcher.n_jobs=32 - --config-name gzip_svc --multirun - deps: - - path: conf/model/best_gzip_svc_truthseeker.yaml - hash: md5 - md5: 97d9d5857744b1cc077513ac5a659f62 - size: 302 - - path: truthseeker/logs/method/ - hash: md5 - md5: 6f6693db2bb9520dc7956f0d0c003e23.dir - size: 116543 - nfiles: 44 - params: - conf/condense.yaml: - hydra: - run: - dir: ${dataset}/logs/condense/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: ??? - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 2 - direction: ${direction} - params: - ++data.sample.train_size: int(interval(20, 1000)) - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.1, 1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - outs: - - path: truthseeker/logs/condense/gzip_svc/ - hash: md5 - md5: bd7cbae34fd6feecf60a49cb537b0f80.dir - size: 13751 - nfiles: 5 - - path: truthseeker/reports/condense/gzip_svc/ - hash: md5 - md5: a24584cdc3464b86b6ff88b90dc62e5e.dir - size: 2701 - nfiles: 3 - condense@sms_spam-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.test_size=100 model_name=gzip_svc model=best_gzip_svc_sms_spam hydra.sweeper.study_name=condense_gzip_svc_sms_spam - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/condense/gzip_svc/ - hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/study.csv model.init.m='tag(log, - interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn - files.directory=sms_spam files.reports=reports/condense/gzip_svc/ hydra.launcher.n_jobs=16 - --config-name condense --multirun - deps: - - path: conf/model/best_gzip_svc_sms_spam.yaml - hash: md5 - md5: 771cd8e3b1368f0fbb30e518002db80f - size: 317 - - path: sms_spam/logs/method/ - hash: md5 - md5: e8e327bbd5859a6c1c362fd482435727.dir - size: 69377 - nfiles: 24 - params: - conf/condense.yaml: - hydra: - run: - dir: ${dataset}/logs/condense/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: ??? - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 2 - direction: ${direction} - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - outs: - - path: sms_spam/logs/condense/gzip_svc/ - hash: md5 - md5: c6ef4ecf2bec03894b2f2018cffc0888.dir - size: 1597147 - nfiles: 513 - - path: sms_spam/reports/condense/gzip_svc/ - hash: md5 - md5: aff4ca5c41e7043fe0d36b4a669ad6a7.dir - size: 344414 - nfiles: 381 - condense@ddos-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.test_size=100 - model_name=gzip_svc model=best_gzip_svc_ddos hydra.sweeper.study_name=condense_gzip_svc_ddos - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/gzip_svc/ - hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/study.csv model.init.m='tag(log, - interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn - files.directory=ddos files.reports=reports/condense/gzip_svc/ hydra.launcher.n_jobs=16 - ++raise_exception=True --config-name condense --multirun - deps: - - path: conf/model/best_gzip_svc_ddos.yaml - hash: md5 - md5: f2ec5b2ff8103b93ca61a5b86888a3e6 - size: 305 - - path: ddos/logs/method/ - hash: md5 - md5: 7128c67930147170f54fb89880528199.dir - size: 120518 - nfiles: 48 - params: - conf/condense.yaml: - hydra: - run: - dir: ${dataset}/logs/condense/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: ??? - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 2 - direction: ${direction} - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - outs: - - path: ddos/logs/condense/gzip_svc/ - hash: md5 - md5: 98f11cc76f9f370871bfb325ec4186e4.dir - size: 1589126 - nfiles: 513 - - path: ddos/reports/condense/gzip_svc/ - hash: md5 - md5: 87ca8778bbdb8363a1e237019c87ebf5.dir - size: 345583 - nfiles: 384 - condense@sms_spam-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.test_size=100 model_name=gzip_knn model=best_gzip_knn_sms_spam hydra.sweeper.study_name=condense_gzip_knn_sms_spam - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/condense/gzip_knn/ - hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/study.csv model.init.m='tag(log, - interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn - files.directory=sms_spam files.reports=reports/condense/gzip_knn/ hydra.launcher.n_jobs=16 - --config-name condense --multirun - deps: - - path: conf/model/best_gzip_knn_sms_spam.yaml - hash: md5 - md5: 430e2be20ddaa39808a6739627a98d77 - size: 259 - - path: sms_spam/logs/method/ - hash: md5 - md5: e8e327bbd5859a6c1c362fd482435727.dir - size: 69377 - nfiles: 24 - params: - conf/condense.yaml: - hydra: - run: - dir: ${dataset}/logs/condense/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: ??? - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 2 - direction: ${direction} - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - outs: - - path: sms_spam/logs/condense/gzip_knn/ - hash: md5 - md5: a45625dcc1d1cc1f1e20d19440e1cdf1.dir - size: 1559584 - nfiles: 513 - - path: sms_spam/reports/condense/gzip_knn/ - hash: md5 - md5: 0ac87faa8d16d77b4e7d5a96cfdde177.dir - size: 335094 - nfiles: 384 - compile@sms_spam-gzip_knn: - cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/gzip_knn --results_file - sms_spam/reports/gzip_knn.csv - deps: - - path: sms_spam/reports/gzip_knn/ - hash: md5 - md5: 89e3b68400367dee648064784adb9796.dir - size: 1499301 - nfiles: 1337 - outs: - - path: sms_spam/reports/gzip_knn.csv - hash: md5 - md5: ee7ee47f5ee27acca9e58b9249ecb954 - size: 695526 - compile@truthseeker-gzip_knn: - cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/gzip_knn --results_file - truthseeker/reports/gzip_knn.csv - deps: - - path: truthseeker/reports/gzip_knn/ - hash: md5 - md5: e5702237f62021b85240717035b53d81.dir - size: 1537318 - nfiles: 1325 - outs: - - path: truthseeker/reports/gzip_knn.csv - hash: md5 - md5: 183afe36078f60e3e478f3813b1b52a7 - size: 711959 - compile@kdd_nsl-gzip_knn: - cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/gzip_knn --results_file - kdd_nsl/reports/gzip_knn.csv - deps: - - path: kdd_nsl/reports/gzip_knn/ - hash: md5 - md5: 4dfe630ff7f6f036220f2b9aa5b3c6b1.dir - size: 4225577 - nfiles: 3608 - outs: - - path: kdd_nsl/reports/gzip_knn.csv - hash: md5 - md5: 17f27e4404093a5b50a74ca0af24e4db - size: 1964725 - compile@truthseeker-gzip_svc: - cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/gzip_svc --results_file - truthseeker/reports/gzip_svc.csv - deps: - - path: truthseeker/reports/gzip_svc/ - hash: md5 - md5: e6e273bb143c7a8949d5be4acca87eb9.dir - size: 1536370 - nfiles: 1725 - outs: - - path: truthseeker/reports/gzip_svc.csv - hash: md5 - md5: 746aae81f4af3c8ce4c8c7e3c3e866b1 - size: 870818 - compile@truthseeker-gzip_logistic: - cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/gzip_logistic --results_file - truthseeker/reports/gzip_logistic.csv - deps: - - path: truthseeker/reports/gzip_logistic/ - hash: md5 - md5: 5074027dccab644424973514ae7c8922.dir - size: 2225784 - nfiles: 1473 - outs: - - path: truthseeker/reports/gzip_logistic.csv - hash: md5 - md5: ed858c429ea35f3dac4eca9c52e036ce - size: 786129 - compile@ddos-gzip_logistic: - cmd: python -m deckard.layers.compile --report_folder ddos/reports/gzip_logistic --results_file - ddos/reports/gzip_logistic.csv - deps: - - path: ddos/reports/gzip_logistic/ - hash: md5 - md5: 6ce8a2aa8cc08ccde4467403dec1a124.dir - size: 6278656 - nfiles: 4845 - outs: - - path: ddos/reports/gzip_logistic.csv - hash: md5 - md5: 7ff452295887d9c84250c7375b7ea58a - size: 2606734 - compile@ddos-gzip_knn: - cmd: python -m deckard.layers.compile --report_folder ddos/reports/gzip_knn --results_file - ddos/reports/gzip_knn.csv - deps: - - path: ddos/reports/gzip_knn/ - hash: md5 - md5: ce89d46c7a34959f9d39a3d1e6ad8911.dir - size: 5724814 - nfiles: 5690 - outs: - - path: ddos/reports/gzip_knn.csv - hash: md5 - md5: fe28ae14c5cc37ee8eb5e705c3610da8 - size: 2899113 - compile@kdd_nsl-gzip_logistic: - cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/gzip_logistic --results_file - kdd_nsl/reports/gzip_logistic.csv - deps: - - path: kdd_nsl/reports/gzip_logistic/ - hash: md5 - md5: bca1b51ebae4e3ef166f9424a0f8c1ff.dir - size: 4923952 - nfiles: 3945 - outs: - - path: kdd_nsl/reports/gzip_logistic.csv - hash: md5 - md5: 07859f070e6b9246456e860d63ab4438 - size: 2149350 - compile@kdd_nsl-gzip_svc: - cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/gzip_svc --results_file - kdd_nsl/reports/gzip_svc.csv - deps: - - path: kdd_nsl/reports/gzip_svc/ - hash: md5 - md5: 907ec439b02a0d2b3ba36d54e250ff89.dir - size: 4798455 - nfiles: 4393 - outs: - - path: kdd_nsl/reports/gzip_svc.csv - hash: md5 - md5: b25b5925936e935b62cdc6bd5b96d8d3 - size: 2257942 - compile@sms_spam-gzip_logistic: - cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/gzip_logistic --results_file - sms_spam/reports/gzip_logistic.csv - deps: - - path: sms_spam/reports/gzip_logistic/ - hash: md5 - md5: c70a60ca7e7e433d1cbd21bfddd26320.dir - size: 2212768 - nfiles: 1438 - outs: - - path: sms_spam/reports/gzip_logistic.csv - hash: md5 - md5: 34643e6fbb37caef6b6f9054cb1b5203 - size: 754980 - compile@ddos-gzip_svc: - cmd: python -m deckard.layers.compile --report_folder ddos/reports/gzip_svc --results_file - ddos/reports/gzip_svc.csv - deps: - - path: ddos/reports/gzip_svc/ - hash: md5 - md5: 3b3fdb3e3d2321e8ee5dc36311626231.dir - size: 6101649 - nfiles: 5283 - outs: - - path: ddos/reports/gzip_svc.csv - hash: md5 - md5: 7bd491b47bf7d5f373cb825e9e3d0c4c - size: 2689051 - compile@sms_spam-gzip_svc: - cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/gzip_svc --results_file - sms_spam/reports/gzip_svc.csv - deps: - - path: sms_spam/reports/gzip_svc/ - hash: md5 - md5: 52af2b025a2aafa3e4a78db0bf221f59.dir - size: 2173475 - nfiles: 1536 - outs: - - path: sms_spam/reports/gzip_svc.csv - hash: md5 - md5: 12c2eec80495a5fb326dbed7c4cfe382 - size: 758618 - clean@truthseeker-gzip_svc: - cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_svc.csv - -o truthseeker/plots/clean/gzip_svc.csv -c conf/clean.yaml - deps: - - path: truthseeker/reports/gzip_svc.csv - hash: md5 - md5: 746aae81f4af3c8ce4c8c7e3c3e866b1 - size: 870818 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: truthseeker/plots/clean/gzip_svc.csv - hash: md5 - md5: cdb96b7ba00dc0bf6b4c8db38311447b - size: 679004 - clean@kdd_nsl-gzip_svc: - cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_svc.csv -o kdd_nsl/plots/clean/gzip_svc.csv - -c conf/clean.yaml - deps: - - path: kdd_nsl/reports/gzip_svc.csv - hash: md5 - md5: b25b5925936e935b62cdc6bd5b96d8d3 - size: 2257942 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: kdd_nsl/plots/clean/gzip_svc.csv - hash: md5 - md5: a359fb46b83265dec352e0af17f19cb2 - size: 1771361 - clean@kdd_nsl-gzip_knn: - cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_knn.csv -o kdd_nsl/plots/clean/gzip_knn.csv - -c conf/clean.yaml - deps: - - path: kdd_nsl/reports/gzip_knn.csv - hash: md5 - md5: 17f27e4404093a5b50a74ca0af24e4db - size: 1964725 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: kdd_nsl/plots/clean/gzip_knn.csv - hash: md5 - md5: 686b0f04494630491244a6ead99949b7 - size: 996268 - clean@ddos-gzip_knn: - cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_knn.csv -o ddos/plots/clean/gzip_knn.csv - -c conf/clean.yaml - deps: - - path: ddos/reports/gzip_knn.csv - hash: md5 - md5: fe28ae14c5cc37ee8eb5e705c3610da8 - size: 2899113 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: ddos/plots/clean/gzip_knn.csv - hash: md5 - md5: ad6773d0af82535d3c525f8bf405bbfe - size: 1919757 - clean@ddos-gzip_svc: - cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_svc.csv -o ddos/plots/clean/gzip_svc.csv - -c conf/clean.yaml - deps: - - path: ddos/reports/gzip_svc.csv - hash: md5 - md5: 7bd491b47bf7d5f373cb825e9e3d0c4c - size: 2689051 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: ddos/plots/clean/gzip_svc.csv - hash: md5 - md5: 45515bad8f1a4167a7a64d0a3d62464e - size: 1842449 - clean@kdd_nsl-gzip_logistic: - cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_logistic.csv - -o kdd_nsl/plots/clean/gzip_logistic.csv -c conf/clean.yaml - deps: - - path: kdd_nsl/reports/gzip_logistic.csv - hash: md5 - md5: 07859f070e6b9246456e860d63ab4438 - size: 2149350 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: kdd_nsl/plots/clean/gzip_logistic.csv - hash: md5 - md5: 82d8bddbe4db8eb6835d00931af7fc12 - size: 1456814 - clean@truthseeker-gzip_knn: - cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_knn.csv - -o truthseeker/plots/clean/gzip_knn.csv -c conf/clean.yaml - deps: - - path: truthseeker/reports/gzip_knn.csv - hash: md5 - md5: 183afe36078f60e3e478f3813b1b52a7 - size: 711959 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: truthseeker/plots/clean/gzip_knn.csv - hash: md5 - md5: dbbbb4c6ab13f540b1b4d9ee23d4a91a - size: 354842 - clean@ddos-gzip_logistic: - cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_logistic.csv -o - ddos/plots/clean/gzip_logistic.csv -c conf/clean.yaml - deps: - - path: ddos/reports/gzip_logistic.csv - hash: md5 - md5: 7ff452295887d9c84250c7375b7ea58a - size: 2606734 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: ddos/plots/clean/gzip_logistic.csv - hash: md5 - md5: a7d5cf7362711724ae19bba3becf66d2 - size: 1523208 - clean@sms_spam-gzip_knn: - cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_knn.csv -o - sms_spam/plots/clean/gzip_knn.csv -c conf/clean.yaml - deps: - - path: sms_spam/reports/gzip_knn.csv - hash: md5 - md5: ee7ee47f5ee27acca9e58b9249ecb954 - size: 695526 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: sms_spam/plots/clean/gzip_knn.csv - hash: md5 - md5: 020bbec4f2594935bd33efdcdf90eba7 - size: 358497 - clean@sms_spam-gzip_logistic: - cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_logistic.csv - -o sms_spam/plots/clean/gzip_logistic.csv -c conf/clean.yaml - deps: - - path: sms_spam/reports/gzip_logistic.csv - hash: md5 - md5: 34643e6fbb37caef6b6f9054cb1b5203 - size: 754980 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: sms_spam/plots/clean/gzip_logistic.csv - hash: md5 - md5: d9a1be37cfb498a7d87c116db6f553e2 - size: 497702 - clean@sms_spam-gzip_svc: - cmd: python -m deckard.layers.clean_data -i sms_spam/reports/gzip_svc.csv -o - sms_spam/plots/clean/gzip_svc.csv -c conf/clean.yaml - deps: - - path: sms_spam/reports/gzip_svc.csv - hash: md5 - md5: 12c2eec80495a5fb326dbed7c4cfe382 - size: 758618 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: sms_spam/plots/clean/gzip_svc.csv - hash: md5 - md5: 4455964d2014f4705b4ea3191cef40b2 - size: 588874 - clean@truthseeker-gzip_logistic: - cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_logistic.csv - -o truthseeker/plots/clean/gzip_logistic.csv -c conf/clean.yaml - deps: - - path: truthseeker/reports/gzip_logistic.csv - hash: md5 - md5: 276fcd9d025d60418d6a92db6bee859e - size: 748894 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: truthseeker/plots/clean/gzip_logistic.csv - hash: md5 - md5: 82450f3b94f517f586b35ed85b494add - size: 417258 - copy@sms_spam: - cmd: 'rm -rf ~/Gzip-KNN/figs/sms_spam/ && mkdir -p ~/Gzip-KNN/figs/sms_spam/ && - cp -r sms_spam/plots/* ~/Gzip-KNN/figs/sms_spam/ ' - deps: - - path: sms_spam/plots/ - hash: md5 - md5: b4562b1ad06e680bf0247d4e8dab85c1.dir - size: 10160120 - nfiles: 19 - copy@truthseeker: - cmd: 'rm -rf ~/Gzip-KNN/figs/truthseeker/ && mkdir -p ~/Gzip-KNN/figs/truthseeker/ - && cp -r truthseeker/plots/* ~/Gzip-KNN/figs/truthseeker/ ' - deps: - - path: truthseeker/plots/ - hash: md5 - md5: 47a062972487c796e962fa241d4bf108.dir - size: 8761443 - nfiles: 18 - copy@kdd_nsl: - cmd: 'rm -rf ~/Gzip-KNN/figs/kdd_nsl/ && mkdir -p ~/Gzip-KNN/figs/kdd_nsl/ && - cp -r kdd_nsl/plots/* ~/Gzip-KNN/figs/kdd_nsl/ ' - deps: - - path: kdd_nsl/plots/ - hash: md5 - md5: 526bfd7a3ffd1b1cee332632d79a96f8.dir - size: 13281984 - nfiles: 18 - copy@ddos: - cmd: 'rm -rf ~/Gzip-KNN/figs/ddos/ && mkdir -p ~/Gzip-KNN/figs/ddos/ && cp -r - ddos/plots/* ~/Gzip-KNN/figs/ddos/ ' - deps: - - path: ddos/plots/ - hash: md5 - md5: 22ac4455d4f24b7a0624f5d670f81e24.dir - size: 15551940 - nfiles: 19 - condense@truthseeker-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.test_size=100 model_name=gzip_knn model=best_gzip_knn_truthseeker - hydra.sweeper.study_name=condense_gzip_knn_truthseeker hydra.sweeper.n_trials=128 - hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/gzip_knn/ hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/study.csv - model.init.m='tag(log, interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn - files.directory=truthseeker files.reports=reports/condense/gzip_knn/ hydra.launcher.n_jobs=16 - --config-name condense --multirun - deps: - - path: conf/model/best_gzip_knn_truthseeker.yaml - hash: md5 - md5: 79baf4709c4a5f2535059ef8d1b6a082 - size: 258 - - path: truthseeker/logs/method/ - hash: md5 - md5: 6f6693db2bb9520dc7956f0d0c003e23.dir - size: 116543 - nfiles: 44 - params: - conf/condense.yaml: - hydra: - run: - dir: ${dataset}/logs/condense/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: ??? - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 2 - direction: ${direction} - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - outs: - - path: truthseeker/logs/condense/gzip_knn/ - hash: md5 - md5: 029aa9a618d0edd127756b0b724a1742.dir - size: 1568426 - nfiles: 513 - - path: truthseeker/reports/condense/gzip_knn/ - hash: md5 - md5: ef4ee3a0a4c954cea9b4f557a216e421.dir - size: 353591 - nfiles: 374 - plot@ddos-gzip_knn: - cmd: python -m deckard.layers.plots --path ddos/plots/ --file ddos/plots/clean_gzip_knn.csv -c - conf/plots.yaml - deps: - - path: ddos/plots/clean_gzip_knn.csv - hash: md5 - md5: c730af75faf35ba958b15b2da82b25be - size: 451405 - params: - conf/plots.yaml: - cat_plot: - - file: symmetric_vs_metric.pdf - x: model.init.symmetric - y: accuracy - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Accuracy - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: linear - ylim: - - 0 - - 1 - - file: symmetric_vs_metric_train_time.pdf - x: model.init.symmetric - y: train_time - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Training Time (s) - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: log - - file: models_vs_accuracy.pdf - x: model_name - y: accuracy - hue: dataset - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Accuracy - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: linear - ylim: - - 0 - - 1 - rotation: 90 - - file: models_vs_train_time.pdf - x: model_name - y: train_time - hue: dataset - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Training Time (s) - legend_title: Samples - rotation: 90 - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: log - - file: models_vs_predict_time.pdf - x: model_name - y: predict_time - hue: dataset - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Prediction Time (s) - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: log - rotation: 90 - line_plot: - - file: metric_vs_accuracy.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: accuracy - ylabel: Accuracy - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - - file: metric_vs_train_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: train_time - ylabel: Training Time (s) - y_scale: log - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - - file: metric_vs_predict_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: predict_time - ylabel: Prediction Time (s) - y_scale: log - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - outs: - - path: ddos/plots/metric_vs_accuracy.pdf - hash: md5 - md5: b8279045dcf3a1fc574578e991427e73 - size: 23629 - - path: ddos/plots/metric_vs_predict_time.pdf - hash: md5 - md5: 1512c0c363753afc28a3c093cc8e252c - size: 22784 - - path: ddos/plots/metric_vs_train_time.pdf - hash: md5 - md5: dd17a922e53b59b2d9b2d91c1237bb54 - size: 22329 - - path: ddos/plots/models_vs_accuracy.pdf - hash: md5 - md5: bbbc08a7161735b6174984688003809f - size: 13970 - - path: ddos/plots/models_vs_predict_time.pdf - hash: md5 - md5: 9b6acd61045df87af51626be2bdff7ab - size: 15507 - - path: ddos/plots/models_vs_train_time.pdf - hash: md5 - md5: 6a2303b531dfc78f20d9bf3dc62d8d42 - size: 16118 - - path: ddos/plots/symmetric_vs_metric.pdf - hash: md5 - md5: 9cd54391a16400664710c9f0589a4d5f - size: 22044 - - path: ddos/plots/symmetric_vs_metric_train_time.pdf - hash: md5 - md5: 0397f39f681300638b6fcc7c2d4e3bda - size: 21616 - plot@kdd_nsl-gzip_knn: - cmd: python -m deckard.layers.plots --path kdd_nsl/plots/ --file kdd_nsl/plots/clean_gzip_knn.csv -c - conf/plots.yaml - deps: - - path: kdd_nsl/plots/clean_gzip_knn.csv - hash: md5 - md5: 1c001f5a7008b439ee4c7946998cbe25 - size: 1002255 - params: - conf/plots.yaml: - cat_plot: - - file: symmetric_vs_metric.pdf - x: model.init.symmetric - y: accuracy - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Accuracy - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: linear - ylim: - - 0 - - 1 - - file: symmetric_vs_metric_train_time.pdf - x: model.init.symmetric - y: train_time - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Training Time (s) - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: log - - file: models_vs_accuracy.pdf - x: model_name - y: accuracy - hue: dataset - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Accuracy - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: linear - ylim: - - 0 - - 1 - rotation: 90 - - file: models_vs_train_time.pdf - x: model_name - y: train_time - hue: dataset - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Training Time (s) - legend_title: Samples - rotation: 90 - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: log - - file: models_vs_predict_time.pdf - x: model_name - y: predict_time - hue: dataset - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Prediction Time (s) - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: log - rotation: 90 - line_plot: - - file: metric_vs_accuracy.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: accuracy - ylabel: Accuracy - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - - file: metric_vs_train_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: train_time - ylabel: Training Time (s) - y_scale: log - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - - file: metric_vs_predict_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: predict_time - ylabel: Prediction Time (s) - y_scale: log - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - outs: - - path: kdd_nsl/plots/metric_vs_accuracy.pdf - hash: md5 - md5: a146ab8c45d548ecd6c285a40c5d49e7 - size: 23228 - - path: kdd_nsl/plots/metric_vs_predict_time.pdf - hash: md5 - md5: 59f7befb701cf34c5bf62a78206d7867 - size: 22642 - - path: kdd_nsl/plots/metric_vs_train_time.pdf - hash: md5 - md5: 938036a897293cbf7dc0b4caa19a5596 - size: 22182 - - path: kdd_nsl/plots/models_vs_accuracy.pdf - hash: md5 - md5: 0dad2f21fc6049c3a24972a35514ee71 - size: 15035 - - path: kdd_nsl/plots/models_vs_predict_time.pdf - hash: md5 - md5: 4361ffb492bff25d3cde95fcdb941ced - size: 16578 - - path: kdd_nsl/plots/models_vs_train_time.pdf - hash: md5 - md5: 416681afbf2e0e87dcc7dfe97f0835fc - size: 16239 - - path: kdd_nsl/plots/symmetric_vs_metric.pdf - hash: md5 - md5: 05a28fb9adea7b847f396fdd96c37d02 - size: 22208 - - path: kdd_nsl/plots/symmetric_vs_metric_train_time.pdf - hash: md5 - md5: 0a0a9daf98ab6efe98cb31b69cba2c65 - size: 21578 - plot@truthseeker-gzip_knn: - cmd: python -m deckard.layers.plots --path truthseeker/plots/ --file truthseeker/plots/clean_gzip_knn.csv -c - conf/plots.yaml - deps: - - path: truthseeker/plots/clean_gzip_knn.csv - hash: md5 - md5: ff0162ac672b57d59126b965580901d9 - size: 620009 - params: - conf/plots.yaml: - cat_plot: - - file: symmetric_vs_metric.pdf - x: model.init.symmetric - y: accuracy - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Accuracy - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: linear - ylim: - - 0 - - 1 - - file: symmetric_vs_metric_train_time.pdf - x: model.init.symmetric - y: train_time - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Training Time (s) - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: log - - file: models_vs_accuracy.pdf - x: model_name - y: accuracy - hue: dataset - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Accuracy - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: linear - ylim: - - 0 - - 1 - rotation: 90 - - file: models_vs_train_time.pdf - x: model_name - y: train_time - hue: dataset - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Training Time (s) - legend_title: Samples - rotation: 90 - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: log - - file: models_vs_predict_time.pdf - x: model_name - y: predict_time - hue: dataset - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Prediction Time (s) - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: log - rotation: 90 - line_plot: - - file: metric_vs_accuracy.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: accuracy - ylabel: Accuracy - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - - file: metric_vs_train_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: train_time - ylabel: Training Time (s) - y_scale: log - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - - file: metric_vs_predict_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: predict_time - ylabel: Prediction Time (s) - y_scale: log - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - outs: - - path: truthseeker/plots/metric_vs_accuracy.pdf - hash: md5 - md5: 3cef9a04adf0d3378d4627c1a8b097a7 - size: 23348 - - path: truthseeker/plots/metric_vs_predict_time.pdf - hash: md5 - md5: a4a5f2426ffaf289e124fb09235e374b - size: 22838 - - path: truthseeker/plots/metric_vs_train_time.pdf - hash: md5 - md5: cda8914da9fabcfb40ea1eb0943e28d3 - size: 22333 - - path: truthseeker/plots/models_vs_accuracy.pdf - hash: md5 - md5: 7ef865e460d2652c873cfe333e7a308d - size: 15215 - - path: truthseeker/plots/models_vs_predict_time.pdf - hash: md5 - md5: eb57bd848d231a47615e311dbd1102b1 - size: 17930 - - path: truthseeker/plots/models_vs_train_time.pdf - hash: md5 - md5: e7bc6589ed86e8f5c3fbb5a747c652fe - size: 17739 - - path: truthseeker/plots/symmetric_vs_metric.pdf - hash: md5 - md5: 0e7c326bef4c0d835f810c67172b2698 - size: 22057 - - path: truthseeker/plots/symmetric_vs_metric_train_time.pdf - hash: md5 - md5: ce21956e382cc48f0a71ef7ccfd79751 - size: 21593 - plot@sms_spam-gzip_knn: - cmd: python -m deckard.layers.plots --path sms_spam/plots/ --file sms_spam/plots/clean_gzip_knn.csv -c - conf/plots.yaml - deps: - - path: sms_spam/plots/clean_gzip_knn.csv - hash: md5 - md5: 13a5803849f7dfdefe18ba16b0a5010f - size: 448070 - params: - conf/plots.yaml: - cat_plot: - - file: symmetric_vs_metric.pdf - x: model.init.symmetric - y: accuracy - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Accuracy - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: linear - ylim: - - 0 - - 1 - - file: symmetric_vs_metric_train_time.pdf - x: model.init.symmetric - y: train_time - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Training Time (s) - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: log - - file: models_vs_accuracy.pdf - x: model_name - y: accuracy - hue: dataset - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Accuracy - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: linear - ylim: - - 0 - - 1 - rotation: 90 - - file: models_vs_train_time.pdf - x: model_name - y: train_time - hue: dataset - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Training Time (s) - legend_title: Samples - rotation: 90 - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: log - - file: models_vs_predict_time.pdf - x: model_name - y: predict_time - hue: dataset - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Prediction Time (s) - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - set: - yscale: log - rotation: 90 - line_plot: - - file: metric_vs_accuracy.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: accuracy - ylabel: Accuracy - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - - file: metric_vs_train_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: train_time - ylabel: Training Time (s) - y_scale: log - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - - file: metric_vs_predict_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: predict_time - ylabel: Prediction Time (s) - y_scale: log - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - outs: - - path: sms_spam/plots/metric_vs_accuracy.pdf - hash: md5 - md5: 507715814c07145dbb140b2b6714973b - size: 23499 - - path: sms_spam/plots/metric_vs_predict_time.pdf - hash: md5 - md5: 97ec5498625837c79fc02850afba09f1 - size: 22606 - - path: sms_spam/plots/metric_vs_train_time.pdf - hash: md5 - md5: f4c9d0805ea5d0ac8e1a2210ee788d47 - size: 22104 - - path: sms_spam/plots/models_vs_accuracy.pdf - hash: md5 - md5: 2eb9ea23cba4e5b734565b7aacdcf43c - size: 14146 - - path: sms_spam/plots/models_vs_predict_time.pdf - hash: md5 - md5: b518bf6b070e7916ad71febd3d3face6 - size: 15523 - - path: sms_spam/plots/models_vs_train_time.pdf - hash: md5 - md5: 617f20892ba643f5c47077af63ae727f - size: 14895 - - path: sms_spam/plots/symmetric_vs_metric.pdf - hash: md5 - md5: 40aa8607331327c4f667fda367defb5f - size: 22033 - - path: sms_spam/plots/symmetric_vs_metric_train_time.pdf - hash: md5 - md5: 775c0bdfc7d9524f1e63b8879ddefccd - size: 21590 - merge@truthseeker: - cmd: python merge.py --big_dir truthseeker/plots/ --data_file clean/gzip_knn.csv - --little_dir_data_file clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder - truthseeker/plots --output_file merged.csv - deps: - - path: truthseeker/plots/clean/gzip_knn.csv - hash: md5 - md5: 1f46e4b3afd875ee11939b23bd1c0852 - size: 560551 - - path: truthseeker/plots/clean/gzip_logistic.csv - hash: md5 - md5: 82450f3b94f517f586b35ed85b494add - size: 417258 - - path: truthseeker/plots/clean/gzip_svc.csv - hash: md5 - md5: cdb96b7ba00dc0bf6b4c8db38311447b - size: 679004 - outs: - - path: truthseeker/plots/merged.csv - hash: md5 - md5: a9b4f71f4d7eccde5a901730969b0bb1 - size: 1711555 - merge@sms_spam: - cmd: python merge.py --big_dir sms_spam/plots/ --data_file clean/gzip_knn.csv - --little_dir_data_file clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder - sms_spam/plots --output_file merged.csv - deps: - - path: sms_spam/plots/clean/gzip_knn.csv - hash: md5 - md5: 020bbec4f2594935bd33efdcdf90eba7 - size: 358497 - - path: sms_spam/plots/clean/gzip_logistic.csv - hash: md5 - md5: d9a1be37cfb498a7d87c116db6f553e2 - size: 497702 - - path: sms_spam/plots/clean/gzip_svc.csv - hash: md5 - md5: 4455964d2014f4705b4ea3191cef40b2 - size: 588874 - outs: - - path: sms_spam/plots/merged.csv - hash: md5 - md5: 3e3e63943b3d62dddc79e554cb691405 - size: 1492939 - merge@ddos: - cmd: python merge.py --big_dir ddos/plots/ --data_file clean/gzip_knn.csv --little_dir_data_file - clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder ddos/plots --output_file - merged.csv - deps: - - path: ddos/plots/clean/gzip_knn.csv - hash: md5 - md5: ad6773d0af82535d3c525f8bf405bbfe - size: 1919757 - - path: ddos/plots/clean/gzip_logistic.csv - hash: md5 - md5: a7d5cf7362711724ae19bba3becf66d2 - size: 1523208 - - path: ddos/plots/clean/gzip_svc.csv - hash: md5 - md5: 45515bad8f1a4167a7a64d0a3d62464e - size: 1842449 - outs: - - path: ddos/plots/merged.csv - hash: md5 - md5: 2fd123789b3c749a653aa9c142d23858 - size: 5465498 - merge@kdd_nsl: - cmd: python merge.py --big_dir kdd_nsl/plots/ --data_file clean/gzip_knn.csv --little_dir_data_file - clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder kdd_nsl/plots --output_file - merged.csv - deps: - - path: kdd_nsl/plots/clean/gzip_knn.csv - hash: md5 - md5: 686b0f04494630491244a6ead99949b7 - size: 996268 - - path: kdd_nsl/plots/clean/gzip_logistic.csv - hash: md5 - md5: 82d8bddbe4db8eb6835d00931af7fc12 - size: 1456814 - - path: kdd_nsl/plots/clean/gzip_svc.csv - hash: md5 - md5: a359fb46b83265dec352e0af17f19cb2 - size: 1771361 - outs: - - path: kdd_nsl/plots/merged.csv - hash: md5 - md5: 7817c0dd6f149eb072f4a5c787fa9655 - size: 4361588 - plot@kdd_nsl: - cmd: python -m deckard.layers.plots --path kdd_nsl/plots/ --file kdd_nsl/plots/merged.csv -c - conf/plots.yaml - deps: - - path: kdd_nsl/plots/merged.csv - hash: md5 - md5: 7817c0dd6f149eb072f4a5c787fa9655 - size: 4361588 - params: - conf/plots.yaml: - cat_plot: - - file: symmetric_vs_metric.pdf - x: model.init.symmetric - y: accuracy - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Accuracy - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: linear - ylim: - - 0 - - 1 - - file: symmetric_vs_metric_train_time.pdf - x: model.init.symmetric - y: train_time - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Training Time (s) - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: log - - file: models_vs_accuracy.pdf - x: model_name - y: accuracy - hue: data.sample.train_size - errorbar: se - kind: boxen - titles: - xlabels: Model - ylabels: Accuracy - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: linear - ylim: - - 0 - - 1 - rotation: 90 - - file: models_vs_train_time.pdf - x: model_name - y: accuracy - hue: data.sample.train_size - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Training Time (s) - legend_title: Samples - rotation: 90 - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: log - - file: models_vs_predict_time.pdf - x: model_name - y: accuracy - hue: data.sample.train_size - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Prediction Time (s) - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: log - rotation: 90 - line_plot: - - file: metric_vs_accuracy.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: accuracy - ylabel: Accuracy - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: metric_vs_train_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: train_time - ylabel: Training Time (s) - y_scale: linear - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: metric_vs_predict_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: predict_time - ylabel: Prediction Time (s) - y_scale: linear - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - outs: - - path: kdd_nsl/plots/metric_vs_accuracy.pdf - hash: md5 - md5: 2abfc1441c3515f07d2e28459e730a4f - size: 24689 - - path: kdd_nsl/plots/metric_vs_predict_time.pdf - hash: md5 - md5: d91c94bf17617b79b2a417710efb9dfc - size: 23239 - - path: kdd_nsl/plots/metric_vs_train_time.pdf - hash: md5 - md5: d2c40b3e36886868c650917d02015be4 - size: 24227 - - path: kdd_nsl/plots/models_vs_accuracy.pdf - hash: md5 - md5: c6807ba0356e42159d683a2b3ab610a9 - size: 23546 - - path: kdd_nsl/plots/models_vs_predict_time.pdf - hash: md5 - md5: 2f6d79e1a5164884b87ef3f40bdafeeb - size: 19370 - - path: kdd_nsl/plots/models_vs_train_time.pdf - hash: md5 - md5: 30ed28915c3ff6de16fffbf8c6bdda45 - size: 18949 - - path: kdd_nsl/plots/symmetric_vs_metric.pdf - hash: md5 - md5: 1d0bb7d03823bb54b5b12b50dbc6615c - size: 22232 - - path: kdd_nsl/plots/symmetric_vs_metric_train_time.pdf - hash: md5 - md5: 802d5119895198601ba2ee24b3cc9528 - size: 21618 - plot@truthseeker: - cmd: python -m deckard.layers.plots --path truthseeker/plots/ --file truthseeker/plots/merged.csv -c - conf/plots.yaml - deps: - - path: truthseeker/plots/merged.csv - hash: md5 - md5: a9b4f71f4d7eccde5a901730969b0bb1 - size: 1711555 - params: - conf/plots.yaml: - cat_plot: - - file: symmetric_vs_metric.pdf - x: model.init.symmetric - y: accuracy - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Accuracy - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: linear - ylim: - - 0 - - 1 - - file: symmetric_vs_metric_train_time.pdf - x: model.init.symmetric - y: train_time - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Training Time (s) - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: log - - file: models_vs_accuracy.pdf - x: model_name - y: accuracy - hue: data.sample.train_size - errorbar: se - kind: boxen - titles: - xlabels: Model - ylabels: Accuracy - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: linear - ylim: - - 0 - - 1 - rotation: 90 - - file: models_vs_train_time.pdf - x: model_name - y: accuracy - hue: data.sample.train_size - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Training Time (s) - legend_title: Samples - rotation: 90 - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: log - - file: models_vs_predict_time.pdf - x: model_name - y: accuracy - hue: data.sample.train_size - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Prediction Time (s) - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: log - rotation: 90 - line_plot: - - file: metric_vs_accuracy.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: accuracy - ylabel: Accuracy - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: metric_vs_train_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: train_time - ylabel: Training Time (s) - y_scale: linear - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: metric_vs_predict_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: predict_time - ylabel: Prediction Time (s) - y_scale: linear - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - outs: - - path: truthseeker/plots/metric_vs_accuracy.pdf - hash: md5 - md5: 935a8c7365ac4b738a1ab222357db671 - size: 23824 - - path: truthseeker/plots/metric_vs_predict_time.pdf - hash: md5 - md5: d5095d1375ed12b1a9b9f8ce5bfee839 - size: 22984 - - path: truthseeker/plots/metric_vs_train_time.pdf - hash: md5 - md5: c6dec8707d3da6a57eb64874b8489aa1 - size: 23404 - - path: truthseeker/plots/models_vs_accuracy.pdf - hash: md5 - md5: c09acc549b30af58463a3a8af31b80d1 - size: 20437 - - path: truthseeker/plots/models_vs_predict_time.pdf - hash: md5 - md5: ff7ffac5905b059ec6670c9220caf124 - size: 18153 - - path: truthseeker/plots/models_vs_train_time.pdf - hash: md5 - md5: f48cdb573700e225810e4ed960768e57 - size: 17725 - - path: truthseeker/plots/symmetric_vs_metric.pdf - hash: md5 - md5: 4b92b154563b9c13bb5f177d0e106002 - size: 22192 - - path: truthseeker/plots/symmetric_vs_metric_train_time.pdf - hash: md5 - md5: 2013309b971cea5728652df1a18ece16 - size: 21586 - plot@sms_spam: - cmd: python -m deckard.layers.plots --path sms_spam/plots/ --file sms_spam/plots/merged.csv -c - conf/plots.yaml - deps: - - path: sms_spam/plots/merged.csv - hash: md5 - md5: 3e3e63943b3d62dddc79e554cb691405 - size: 1492939 - params: - conf/plots.yaml: - cat_plot: - - file: symmetric_vs_metric.pdf - x: model.init.symmetric - y: accuracy - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Accuracy - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: linear - ylim: - - 0 - - 1 - - file: symmetric_vs_metric_train_time.pdf - x: model.init.symmetric - y: train_time - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Training Time (s) - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: log - - file: models_vs_accuracy.pdf - x: model_name - y: accuracy - hue: data.sample.train_size - errorbar: se - kind: boxen - titles: - xlabels: Model - ylabels: Accuracy - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: linear - ylim: - - 0 - - 1 - rotation: 90 - - file: models_vs_train_time.pdf - x: model_name - y: accuracy - hue: data.sample.train_size - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Training Time (s) - legend_title: Samples - rotation: 90 - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: log - - file: models_vs_predict_time.pdf - x: model_name - y: accuracy - hue: data.sample.train_size - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Prediction Time (s) - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: log - rotation: 90 - line_plot: - - file: metric_vs_accuracy.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: accuracy - ylabel: Accuracy - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: metric_vs_train_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: train_time - ylabel: Training Time (s) - y_scale: linear - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: metric_vs_predict_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: predict_time - ylabel: Prediction Time (s) - y_scale: linear - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - outs: - - path: sms_spam/plots/metric_vs_accuracy.pdf - hash: md5 - md5: 695e96d374959cef893859230a15f1a7 - size: 24667 - - path: sms_spam/plots/metric_vs_predict_time.pdf - hash: md5 - md5: 857505ffce8416303759a76cb29b26a3 - size: 23552 - - path: sms_spam/plots/metric_vs_train_time.pdf - hash: md5 - md5: 98b34d861b84d36cb30f58c763445eb7 - size: 23637 - - path: sms_spam/plots/models_vs_accuracy.pdf - hash: md5 - md5: 3d9cda5e091398ec195ff1c763fb0b5a - size: 23033 - - path: sms_spam/plots/models_vs_predict_time.pdf - hash: md5 - md5: 06ae4883133a4f2bb4c19f531c693fdd - size: 19365 - - path: sms_spam/plots/models_vs_train_time.pdf - hash: md5 - md5: f8af33a8abf0caf4fc83a69b6af565a0 - size: 18945 - - path: sms_spam/plots/symmetric_vs_metric.pdf - hash: md5 - md5: 43b4f4865931fca59079491745c20f1c - size: 22231 - - path: sms_spam/plots/symmetric_vs_metric_train_time.pdf - hash: md5 - md5: 4f5b0a9ac3efe2e0daa225f79fe0e40c - size: 21606 - plot@ddos: - cmd: python -m deckard.layers.plots --path ddos/plots/ --file ddos/plots/merged.csv -c - conf/plots.yaml - deps: - - path: ddos/plots/merged.csv - hash: md5 - md5: 2fd123789b3c749a653aa9c142d23858 - size: 5465498 - params: - conf/plots.yaml: - cat_plot: - - file: symmetric_vs_metric.pdf - x: model.init.symmetric - y: accuracy - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Accuracy - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: linear - ylim: - - 0 - - 1 - - file: symmetric_vs_metric_train_time.pdf - x: model.init.symmetric - y: train_time - hue: model.init.metric - errorbar: se - kind: bar - titles: - xlabels: '' - ylabels: Training Time (s) - legend_title: Metrics - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: log - - file: models_vs_accuracy.pdf - x: model_name - y: accuracy - hue: data.sample.train_size - errorbar: se - kind: boxen - titles: - xlabels: Model - ylabels: Accuracy - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: linear - ylim: - - 0 - - 1 - rotation: 90 - - file: models_vs_train_time.pdf - x: model_name - y: accuracy - hue: data.sample.train_size - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Training Time (s) - legend_title: Samples - rotation: 90 - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: log - - file: models_vs_predict_time.pdf - x: model_name - y: accuracy - hue: data.sample.train_size - errorbar: se - kind: bar - titles: - xlabels: Model - ylabels: Prediction Time (s) - legend_title: Samples - legend: - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - set: - yscale: log - rotation: 90 - line_plot: - - file: metric_vs_accuracy.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: accuracy - ylabel: Accuracy - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: metric_vs_train_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: train_time - ylabel: Training Time (s) - y_scale: linear - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: metric_vs_predict_time.pdf - hue: model.init.metric - title: - x: data.sample.train_size - xlabel: Number of Training Samples - y: predict_time - ylabel: Prediction Time (s) - y_scale: linear - hue_order: - - Gzip - - Pickle - - BZ2 - - Zstd - - Lzma - - Levenshtein - - Ratio - - Hamming - - Jaro - - Jaro-Winkler - - SeqRatio - errorbar: se - err_style: bars - xlim: - - 10 - - 500 - legend: - title: Metrics - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - outs: - - path: ddos/plots/metric_vs_accuracy.pdf - hash: md5 - md5: 3b2f9c2885d331a0cadd339177318f3f - size: 24827 - - path: ddos/plots/metric_vs_predict_time.pdf - hash: md5 - md5: 56c78e45d5932c61b339753810a6fed1 - size: 24347 - - path: ddos/plots/metric_vs_train_time.pdf - hash: md5 - md5: 7ba195f1f39c450c7ebd9165eee97f32 - size: 22962 - - path: ddos/plots/models_vs_accuracy.pdf - hash: md5 - md5: 4e5e04199aa08c3098632cf8fad2c744 - size: 23780 - - path: ddos/plots/models_vs_predict_time.pdf - hash: md5 - md5: 41c0c84e0b3b737273692f10c366b275 - size: 19529 - - path: ddos/plots/models_vs_train_time.pdf - hash: md5 - md5: 38dd71a6ac8cd50294d5b81bffd8425b - size: 19106 - - path: ddos/plots/symmetric_vs_metric.pdf - hash: md5 - md5: 72331f97089e5465a2df8a071f6dcf10 - size: 22223 - - path: ddos/plots/symmetric_vs_metric_train_time.pdf - hash: md5 - md5: 3014b61ef7c5fe2e5276149ecd20625b - size: 22143 - condense@truthseeker-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.test_size=100 model_name=gzip_logistic model=best_gzip_logistic_truthseeker - hydra.sweeper.study_name=condense_gzip_logistic_truthseeker hydra.sweeper.n_trials=128 - hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/gzip_logistic/ - hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/study.csv - model.init.m='tag(log, interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn - files.directory=truthseeker files.reports=reports/condense/gzip_logistic/ hydra.launcher.n_jobs=16 - --config-name condense --multirun - deps: - - path: conf/model/best_gzip_logistic_truthseeker.yaml - hash: md5 - md5: 448e12c542f48c074057e9374743d61e - size: 326 - - path: truthseeker/logs/method/ - hash: md5 - md5: 6f6693db2bb9520dc7956f0d0c003e23.dir - size: 116543 - nfiles: 44 - params: - conf/condense.yaml: - hydra: - run: - dir: ${dataset}/logs/condense/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: ??? - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 2 - direction: ${direction} - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - outs: - - path: truthseeker/logs/condense/gzip_logistic/ - hash: md5 - md5: 79d74a0dfe0486ada3f03b24c68973dc.dir - size: 1576129 - nfiles: 513 - - path: truthseeker/reports/condense/gzip_logistic/ - hash: md5 - md5: 3de3011b1d96e4990111f5b1601e3b9d.dir - size: 400559 - nfiles: 343 - condense@ddos-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.test_size=100 - model_name=gzip_knn model=best_gzip_knn_ddos hydra.sweeper.study_name=condense_gzip_knn_ddos - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/gzip_knn/ - hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/study.csv model.init.m='tag(log, - interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn - files.directory=ddos files.reports=reports/condense/gzip_knn/ hydra.launcher.n_jobs=16 - --config-name condense --multirun - deps: - - path: conf/model/best_gzip_knn_ddos.yaml - hash: md5 - md5: 74721f3e7ab6096e246c486d6080e1ab - size: 259 - - path: ddos/logs/method/ - hash: md5 - md5: 7128c67930147170f54fb89880528199.dir - size: 120518 - nfiles: 48 - params: - conf/condense.yaml: - hydra: - run: - dir: ${dataset}/logs/condense/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: ??? - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 2 - direction: ${direction} - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - outs: - - path: ddos/logs/condense/gzip_knn/ - hash: md5 - md5: a2dc5aef876897f53c4076e4012b678a.dir - size: 1542474 - nfiles: 513 - - path: ddos/reports/condense/gzip_knn/ - hash: md5 - md5: 781709e87f2e740f6a0f4e914ee9754f.dir - size: 340848 - nfiles: 379 - condense@ddos-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.test_size=100 - model_name=gzip_logistic model=best_gzip_logistic_ddos hydra.sweeper.study_name=condense_gzip_logistic_ddos - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/gzip_logistic/ - hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/study.csv model.init.m='tag(log, - interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn - files.directory=ddos files.reports=reports/condense/gzip_logistic/ hydra.launcher.n_jobs=16 - --config-name condense --multirun - deps: - - path: conf/model/best_gzip_logistic_ddos.yaml - hash: md5 - md5: 9507b28fa5a18b501fe9d80ec33bed1c - size: 334 - - path: ddos/logs/method/ - hash: md5 - md5: 7128c67930147170f54fb89880528199.dir - size: 120518 - nfiles: 48 - params: - conf/condense.yaml: - hydra: - run: - dir: ${dataset}/logs/condense/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: ??? - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 2 - direction: ${direction} - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - outs: - - path: ddos/logs/condense/gzip_logistic/ - hash: md5 - md5: 4f8f846516837f0e7cd63c8911aff99a.dir - size: 1623568 - nfiles: 513 - - path: ddos/reports/condense/gzip_logistic/ - hash: md5 - md5: 051b71717b4a7986a1965ebadf448838.dir - size: 350870 - nfiles: 384 - condense@kdd_nsl-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.test_size=100 model_name=gzip_knn model=best_gzip_knn_kdd_nsl hydra.sweeper.study_name=condense_gzip_knn_kdd_nsl - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/condense/gzip_knn/ - hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/study.csv model.init.m='tag(log, - interval(.01, .1))' +model.init.sampling_method=medoid,sum,svc,random,hardness,nearmiss,knn - files.directory=kdd_nsl files.reports=reports/condense/gzip_knn/ hydra.launcher.n_jobs=16 - --config-name condense --multirun - deps: - - path: conf/model/best_gzip_knn_kdd_nsl.yaml - hash: md5 - md5: 2697918626643d0136286367b83ee6b9 - size: 258 - - path: kdd_nsl/logs/method/ - hash: md5 - md5: de8764bbb2daa13261f3f5d1dff27a30.dir - size: 79348 - nfiles: 28 - params: - conf/condense.yaml: - hydra: - run: - dir: ${dataset}/logs/condense/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: ??? - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 2 - direction: ${direction} - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - outs: - - path: kdd_nsl/logs/condense/gzip_knn/ - hash: md5 - md5: 7d53f3534ceb486e6601d344562cfb32.dir - size: 1564530 - nfiles: 513 - - path: kdd_nsl/reports/condense/gzip_knn/ - hash: md5 - md5: 7e5a283215281be3ee4189ebd5a6e3f1.dir - size: 342924 - nfiles: 384 - parse_params: - cmd: python -m deckard.layers.parse - deps: - - path: conf/data/default.yaml - hash: md5 - md5: 86639d6672cfd9529dda3e2ae4036c01 - size: 22 - - path: conf/default.yaml - hash: md5 - md5: a0a533f84a7ffce197e0db5439219faf - size: 1504 - - path: conf/files/default.yaml - hash: md5 - md5: 7a2df5f8b98699376c3fb4da05d70dea - size: 306 - - path: conf/model/default.yaml - hash: md5 - md5: 39dc7512b1d19fea54550b080d880153 - size: 27 - - path: conf/scorers/default.yaml - hash: md5 - md5: d8d00e7d284ea68b1244743dfef8f00c - size: 280 - outs: - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - test_each_metric@gzip-gzip_knn-kdd_nsl-20: - cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/gzip/20 - files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl - model=gzip_knn model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/gzip/20 - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/logs/test_each_metric/gzip_knn/gzip/20 - hash: md5 - md5: 6091388fcd68296e6ccd16f0955cba96.dir - size: 7683 - nfiles: 4 - - path: kdd_nsl/reports/test_each_metric/gzip_knn/gzip/20/score_dict.json - hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_metric@zstd-gzip_knn-kdd_nsl-20: - cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/zstd/20 - files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl - model=gzip_knn model_name=gzip_knn model.init.metric=zstd model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/zstd/20 - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/logs/test_each_metric/gzip_knn/zstd/20 - hash: md5 - md5: 704acd4e060b20b19dd8c6528ee42b02.dir - size: 7683 - nfiles: 4 - - path: kdd_nsl/reports/test_each_metric/gzip_knn/zstd/20/score_dict.json - hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_metric@pkl-gzip_knn-kdd_nsl-20: - cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/pkl/20 - files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl - model=gzip_knn model_name=gzip_knn model.init.metric=pkl model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/pkl/20 - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/logs/test_each_metric/gzip_knn/pkl/20 - hash: md5 - md5: 539ec713f43133226c23d088f60a66bf.dir - size: 7668 - nfiles: 4 - - path: kdd_nsl/reports/test_each_metric/gzip_knn/pkl/20/score_dict.json - hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_metric@bz2-gzip_knn-kdd_nsl-20: - cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/bz2/20 - files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl - model=gzip_knn model_name=gzip_knn model.init.metric=bz2 model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/bz2/20 - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/logs/test_each_metric/gzip_knn/bz2/20 - hash: md5 - md5: dc85f72896e274b978488f36ec121474.dir - size: 7668 - nfiles: 4 - - path: kdd_nsl/reports/test_each_metric/gzip_knn/bz2/20/score_dict.json - hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_metric@lzma-gzip_knn-kdd_nsl-20: - cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/lzma/20 - files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl - model=gzip_knn model_name=gzip_knn model.init.metric=lzma model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/lzma/20 - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/logs/test_each_metric/gzip_knn/lzma/20 - hash: md5 - md5: 3e929ed47c2f62267a513fcc9ac7faec.dir - size: 7683 - nfiles: 4 - - path: kdd_nsl/reports/test_each_metric/gzip_knn/lzma/20/score_dict.json - hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_metric@levenshtein-gzip_knn-kdd_nsl-20: - cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/levenshtein/20 - files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl - model=gzip_knn model_name=gzip_knn model.init.metric=levenshtein model.init.m=-1 - hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/levenshtein/20 ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/logs/test_each_metric/gzip_knn/levenshtein/20 - hash: md5 - md5: 6e719f5801c71fe88793e4a42fe47b68.dir - size: 7767 - nfiles: 4 - - path: kdd_nsl/reports/test_each_metric/gzip_knn/levenshtein/20/score_dict.json - hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_metric@ratio-gzip_knn-kdd_nsl-20: - cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/ratio/20 - files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl - model=gzip_knn model_name=gzip_knn model.init.metric=ratio model.init.m=-1 - hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/ratio/20 ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/plots.yaml: + cat_plot: + - file: symmetric_vs_compressor_metric.pdf + x: Metric + y: accuracy + hue: Symmetric + errorbar: se + kind: boxen + titles: ' ' + xlabels: Compressor + ylabels: Accuracy + legend_title: Metrics + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_string_metric.pdf + x: Metric + y: accuracy + hue: Symmetric + errorbar: se + kind: boxen + titles: ' ' + xlabels: Compressors + ylabels: Accuracy + legend_title: ' ' + order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_metric.pdf + x: Metric + y: accuracy + hue: Symmetric + errorbar: se + kind: boxen + titles: ' ' + xlabels: Compressors + ylabels: Accuracy + legend_title: ' ' + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_metric_train_time.pdf + x: Metric + y: train_time + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: Metrics + ylabels: Training Time (s) + legend_title: Metrics + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + y_scale: linear + - file: symmetric_vs_string_metric_train_time.pdf + x: Metric + y: train_time + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: Compressors + ylabels: Training Time (s) + legend_title: String Metrics + order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_compressor_metric_train_time.pdf + x: Metric + y: train_time + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: Compressors + ylabels: Training Time (s) + legend_title: Metrics + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + line_plot: + - file: compressor_metric_vs_accuracy.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_accuracy.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: string_metric_vs_accuracy.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_train_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: compressor_metric_vs_train_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: string_metric_vs_train_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: compressor_metric_vs_predict_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - file: metric_vs_predict_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + - file: string_metric_vs_predict_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 outs: - - path: kdd_nsl/logs/test_each_metric/gzip_knn/ratio/20 - hash: md5 - md5: c7917445640a277d2a898413a74442e3.dir - size: 7677 - nfiles: 4 - - path: kdd_nsl/reports/test_each_metric/gzip_knn/ratio/20/score_dict.json - hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_metric@hamming-gzip_knn-kdd_nsl-20: - cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/hamming/20 - files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl - model=gzip_knn model_name=gzip_knn model.init.metric=hamming model.init.m=-1 - hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/hamming/20 ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json + - path: sms_spam/plots/compressor_metric_vs_accuracy.pdf hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + md5: 5dffa574fee935f98ce74c5cd6058666 + size: 21187 + - path: sms_spam/plots/metric_vs_accuracy.pdf hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/logs/test_each_metric/gzip_knn/hamming/20 + md5: b9f73f48c8c024650db938dd804cfb05 + size: 24114 + - path: sms_spam/plots/string_metric_vs_accuracy.pdf hash: md5 - md5: 384b5ae13749ca9006486a64dd50faf0.dir - size: 7707 - nfiles: 4 - - path: kdd_nsl/reports/test_each_metric/gzip_knn/hamming/20/score_dict.json + md5: 864db5ed7b357958078bdea3ba0bad42 + size: 20486 + - path: sms_spam/plots/symmetric_vs_compressor_metric.pdf hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_dataset@gzip_knn-kdd_nsl: - cmd: 'python -m deckard.layers.optimise stage=test_each_dataset files.name=gzip_knn - data.sample.train_size=100 files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl - model_name=gzip_knn model=gzip_knn hydra.run.dir=kdd_nsl/logs/test_each_dataset/gzip_knn - ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json + md5: 501f5407e6906966dcb8b0c277d44dc3 + size: 21377 + - path: sms_spam/plots/symmetric_vs_metric.pdf hash: md5 - md5: 41e95614d524a857c0260b13ce77202b - size: 488 - - path: params.yaml + md5: 060ab65502a83ee367156e0414905962 + size: 31387 + - path: sms_spam/plots/symmetric_vs_metric_train_time.pdf hash: md5 - md5: 9a178db02b5ad8f990c7a557790a36c7 - size: 1381 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/logs/test_each_dataset/gzip_knn + md5: 18653a51a07e2fc5598620c2cf268fc8 + size: 31725 + - path: sms_spam/plots/symmetric_vs_string_metric.pdf hash: md5 - md5: 955370e62c64341f4410f3f46f6d84fd.dir - size: 7263 - nfiles: 4 - - path: kdd_nsl/reports/test_each_dataset/gzip_knn/score_dict.json + md5: fbbd49babe5bee5e8b16ac52bb01ffaa + size: 23669 + - path: sms_spam/plots/symmetric_vs_string_metric_train_time.pdf hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_dataset@gzip_knn-truthseeker: - cmd: 'python -m deckard.layers.optimise stage=test_each_dataset files.name=gzip_knn - data.sample.train_size=100 files.directory=truthseeker data=truthseeker dataset=truthseeker - model_name=gzip_knn model=gzip_knn hydra.run.dir=truthseeker/logs/test_each_dataset/gzip_knn - ++raise_exception=True ' + md5: 9b25b9f84afa0f43c3276b7e8f1866d3 + size: 24712 + plot_condense@sms_spam: + cmd: python -m deckard.layers.plots --path sms_spam/plots/ --file sms_spam/plots/condensed_merged.csv -c + conf/condensed_plots.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json + - path: conf/condensed_plots.yaml hash: md5 - md5: 41e95614d524a857c0260b13ce77202b - size: 488 - - path: params.yaml + md5: af17fa58e7c01bcbb396ab08de5b78d5 + size: 1915 + - path: sms_spam/plots/condensed_merged.csv hash: md5 - md5: 9a178db02b5ad8f990c7a557790a36c7 - size: 1381 + md5: aff0ab5439e406220d4c0c95d7032f71 + size: 4293513 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: truthseeker/logs/test_each_dataset/gzip_knn - hash: md5 - md5: f8dd2e14f7e12daed6ebfd9a552d6c4e.dir - size: 7305 - nfiles: 4 - - path: truthseeker/reports/test_each_dataset/gzip_knn/score_dict.json - hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_dataset@ddos-gzip_knn: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_knn data.sample.train_size=100 - files.directory=ddos data=ddos dataset=ddos model_name=gzip_knn model=gzip_knn - hydra.run.dir=ddos/logs/train/gzip_knn ++raise_exception=True ' + conf/condensed_plots.yaml: + cat_plot: + - file: condensing_method_vs_accuracy.pdf + digitize: Condensing Ratio + x: Condensing Method + hue: Condensing Ratio + y: accuracy + y_scale: linear + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + kind: boxen + col: Model + rotation: 45 + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xlabels: Condensing Method + ylabels: Accuracy + legend_title: Sample Ratio + - file: condensing_method_vs_train_time.pdf + x: Condensing Method + hue: Condensing Ratio + digitize: Condensing Ratio + y: train_time + y_scale: log + kind: boxen + col: Model + rotation: 45 + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - k-NN + xlabels: Condensing Method + ylabels: Training Time + legend_title: Sample Ratio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: condensing_method_vs_predict_time.pdf + x: Condensing Method + hue: Condensing Ratio + digitize: Condensing Ratio + y: predict_time + y_scale: log + col: Model + rotation: 45 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + kind: boxen + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - k-NN + xlabels: Condensing Method + ylabels: Prediction Time + legend_title: Sample Ratio + outs: + - path: sms_spam/plots/condensing_method_vs_accuracy.pdf + hash: md5 + md5: 367e877eaa1c765d35ab91cb242684ea + size: 77057 + - path: sms_spam/plots/condensing_method_vs_predict_time.pdf + hash: md5 + md5: d2376488f2a0c040274c3d2036733e00 + size: 79014 + - path: sms_spam/plots/condensing_method_vs_train_time.pdf + hash: md5 + md5: cc97909ea8a9d7df69647a6705d624b4 + size: 78699 + copy@sms_spam: + cmd: rm -rf ~/Gzip-KNN/figs/sms_spam/ && mkdir -p ~/Gzip-KNN/figs/sms_spam/ && + cp -r sms_spam/plots/* ~/Gzip-KNN/figs/sms_spam/ && rm -rf ~/Gzip-KNN/figs/sms_spam/.gitignore deps: - - path: kdd_nsl/reports/train/default/score_dict.json + - path: sms_spam/plots/ hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + md5: ee777ff721b32fb8529b6b3d4cf0241f.dir + size: 14711161 + nfiles: 29 + clean@kdd_nsl-condense/knn: + cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/knn.csv + -o kdd_nsl/plots/clean/condense/knn.csv -c conf/clean.yaml + deps: + - path: kdd_nsl/reports/condense/knn.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 36a67671da89d39ab7d0c45296693749 + size: 2482710 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: ddos/logs/train/gzip_knn + - path: kdd_nsl/plots/clean/condense/knn.csv hash: md5 - md5: 86973d6369f6a61b442f6387478ccde6.dir - size: 8041 - nfiles: 4 - - path: ddos/reports/train/gzip_knn/score_dict.json - hash: md5 - md5: 1269132e68fc8dff521df51cb2fe321c - size: 284 - test_each_dataset@ddos-gzip_svc: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_svc data.sample.train_size=100 - files.directory=ddos data=ddos dataset=ddos model_name=gzip_svc model=gzip_svc - hydra.run.dir=ddos/logs/train/gzip_svc ++raise_exception=True ' + md5: 7faf7190b1f806dbc3eb6477cedc7ee5 + size: 1507783 + clean@kdd_nsl-condense/logistic: + cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/logistic.csv + -o kdd_nsl/plots/clean/condense/logistic.csv -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + - path: kdd_nsl/reports/condense/logistic.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 1325ef7a8bebf6d77e0793ce344e95cc + size: 2886969 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: ddos/logs/train/gzip_svc + - path: kdd_nsl/plots/clean/condense/logistic.csv hash: md5 - md5: 67d472318cba51a8f9e7989991cbf09e.dir - size: 8038 - nfiles: 4 - - path: ddos/reports/train/gzip_svc/score_dict.json - hash: md5 - md5: 5728b15f67d338a4bf8160b60715dce8 - size: 283 - test_each_dataset@ddos-gzip_logistic: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_logistic - data.sample.train_size=100 files.directory=ddos data=ddos dataset=ddos model_name=gzip_logistic - model=gzip_logistic hydra.run.dir=ddos/logs/train/gzip_logistic ++raise_exception=True ' + md5: 8baf78c24cf0a48103fe3f5c3b7ea340 + size: 2014871 + clean@kdd_nsl-condense/svc: + cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/svc.csv + -o kdd_nsl/plots/clean/condense/svc.csv -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + - path: kdd_nsl/reports/condense/svc.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: d825a5d325742621f7cfaf2849ddf79f + size: 2731160 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: ddos/logs/train/gzip_logistic + - path: kdd_nsl/plots/clean/condense/svc.csv hash: md5 - md5: 24fe0f4f52e6989c5a1c65795ea0d936.dir - size: 8173 - nfiles: 4 - - path: ddos/reports/train/gzip_logistic/score_dict.json - hash: md5 - md5: 259b4ae57c0c1e8d08b72f7f888fbe45 - size: 281 - test_each_dataset@truthseeker-gzip_knn: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_knn data.sample.train_size=100 - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_knn - model=gzip_knn hydra.run.dir=truthseeker/logs/train/gzip_knn ++raise_exception=True ' + md5: c0b256435cf12d7637b92514bf852c4c + size: 2007338 + merge_condense@kdd_nsl: + cmd: python merge.py --big_dir kdd_nsl/plots/ --data_file clean/condense/knn.csv + --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder + kdd_nsl/plots/ --output_file condensed_merged.csv deps: - - path: kdd_nsl/reports/train/default/score_dict.json + - path: kdd_nsl/plots/clean/condense/knn.csv hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + md5: 7faf7190b1f806dbc3eb6477cedc7ee5 + size: 1507783 + - path: kdd_nsl/plots/clean/condense/logistic.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + md5: 8baf78c24cf0a48103fe3f5c3b7ea340 + size: 2014871 + - path: kdd_nsl/plots/clean/condense/svc.csv + hash: md5 + md5: c0b256435cf12d7637b92514bf852c4c + size: 2007338 outs: - - path: truthseeker/logs/train/gzip_knn + - path: kdd_nsl/plots/condensed_merged.csv hash: md5 - md5: ba3eb31317c073b3b07a9c9d1948e656.dir - size: 8158 - nfiles: 4 - - path: truthseeker/reports/train/gzip_knn/score_dict.json - hash: md5 - md5: 2088612d107192d0497e9fd2c569818f - size: 283 - test_each_dataset@truthseeker-gzip_svc: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_svc data.sample.train_size=100 - files.directory=truthseeker data=truthseeker dataset=truthseeker model_name=gzip_svc - model=gzip_svc hydra.run.dir=truthseeker/logs/train/gzip_svc ++raise_exception=True ' + md5: 3ce3f32f881b93574c5e475e5617847e + size: 5582885 + clean@kdd_nsl-gzip_knn: + cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_knn.csv -o kdd_nsl/plots/clean/gzip_knn.csv + -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + - path: kdd_nsl/reports/gzip_knn.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 2e569940af77f7280eaa067077d75b0b + size: 1286094 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: truthseeker/logs/train/gzip_svc + - path: kdd_nsl/plots/clean/gzip_knn.csv hash: md5 - md5: 4512bda479ab6cd5ae74e7f575928b9d.dir - size: 8154 - nfiles: 4 - - path: truthseeker/reports/train/gzip_svc/score_dict.json - hash: md5 - md5: 25d8ec2a07497188e4311c5d62f9ddb6 - size: 281 - test_each_dataset@truthseeker-gzip_logistic: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_logistic - data.sample.train_size=100 files.directory=truthseeker data=truthseeker dataset=truthseeker - model_name=gzip_logistic model=gzip_logistic hydra.run.dir=truthseeker/logs/train/gzip_logistic - ++raise_exception=True ' + md5: 24f521894702af73c82fd3b8b8ff27b1 + size: 715749 + clean@kdd_nsl-gzip_logistic: + cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_logistic.csv + -o kdd_nsl/plots/clean/gzip_logistic.csv -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + - path: kdd_nsl/reports/gzip_logistic.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: a5d9359b42a7d7b06cdc0d9438bfa836 + size: 1406330 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: truthseeker/logs/train/gzip_logistic + - path: kdd_nsl/plots/clean/gzip_logistic.csv hash: md5 - md5: e1da0260d3c55bfbf4a44bb1b96206ba.dir - size: 8315 - nfiles: 4 - - path: truthseeker/reports/train/gzip_logistic/score_dict.json - hash: md5 - md5: 9ba0565e8f7dcb14a1e45b8e585d9ccb - size: 283 - test_each_dataset@sms_spam-gzip_knn: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_knn data.sample.train_size=100 - files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_knn - model=gzip_knn hydra.run.dir=sms_spam/logs/train/gzip_knn ++raise_exception=True ' + md5: 2847de576a49e63aae2ae02937d39ce4 + size: 1056239 + clean@kdd_nsl-gzip_svc: + cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/gzip_svc.csv -o kdd_nsl/plots/clean/gzip_svc.csv + -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + - path: kdd_nsl/reports/gzip_svc.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: db5b11d405596dfa38b7592ad89e4e4a + size: 1407185 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: sms_spam/logs/train/gzip_knn + - path: kdd_nsl/plots/clean/gzip_svc.csv hash: md5 - md5: 2066e09b41a2f6ce0c835018278b0dc6.dir - size: 8093 - nfiles: 4 - - path: sms_spam/reports/train/gzip_knn/score_dict.json - hash: md5 - md5: 45ab656d14366622402a687082c5feeb - size: 284 - test_each_dataset@sms_spam-gzip_svc: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_svc data.sample.train_size=100 - files.directory=sms_spam data=sms_spam dataset=sms_spam model_name=gzip_svc - model=gzip_svc hydra.run.dir=sms_spam/logs/train/gzip_svc ++raise_exception=True ' + md5: 9438c5a8752b7c4224ba94b8ee98dee5 + size: 1156562 + merge@kdd_nsl: + cmd: python merge.py --big_dir kdd_nsl/plots/ --data_file clean/gzip_knn.csv --little_dir_data_file + clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder kdd_nsl/plots --output_file + merged.csv deps: - - path: kdd_nsl/reports/train/default/score_dict.json + - path: kdd_nsl/plots/clean/gzip_knn.csv hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + md5: 24f521894702af73c82fd3b8b8ff27b1 + size: 715749 + - path: kdd_nsl/plots/clean/gzip_logistic.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + md5: 2847de576a49e63aae2ae02937d39ce4 + size: 1056239 + - path: kdd_nsl/plots/clean/gzip_svc.csv + hash: md5 + md5: 9438c5a8752b7c4224ba94b8ee98dee5 + size: 1156562 outs: - - path: sms_spam/logs/train/gzip_svc + - path: kdd_nsl/plots/merged.csv hash: md5 - md5: 4f8d2f14bf8ed23f7443b91640fbb2c0.dir - size: 8090 - nfiles: 4 - - path: sms_spam/reports/train/gzip_svc/score_dict.json - hash: md5 - md5: 6cf7317e720631b93bcd699b22a9c4ec - size: 283 - test_each_dataset@sms_spam-gzip_logistic: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_logistic - data.sample.train_size=100 files.directory=sms_spam data=sms_spam dataset=sms_spam - model_name=gzip_logistic model=gzip_logistic hydra.run.dir=sms_spam/logs/train/gzip_logistic - ++raise_exception=True ' + md5: e9aaa44e6ef176c174b296c31a6760f9 + size: 2956133 + plot@kdd_nsl: + cmd: python -m deckard.layers.plots --path kdd_nsl/plots/ --file kdd_nsl/plots/merged.csv -c + conf/plots.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json + - path: conf/plots.yaml hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + md5: 43e3ec0876b55c83f231615f7a904e33 + size: 7386 + - path: kdd_nsl/plots/merged.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: e9aaa44e6ef176c174b296c31a6760f9 + size: 2956133 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/plots.yaml: + cat_plot: + - file: symmetric_vs_compressor_metric.pdf + x: Metric + y: accuracy + hue: Symmetric + errorbar: se + kind: boxen + titles: ' ' + xlabels: Compressor + ylabels: Accuracy + legend_title: Metrics + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_string_metric.pdf + x: Metric + y: accuracy + hue: Symmetric + errorbar: se + kind: boxen + titles: ' ' + xlabels: Compressors + ylabels: Accuracy + legend_title: ' ' + order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_metric.pdf + x: Metric + y: accuracy + hue: Symmetric + errorbar: se + kind: boxen + titles: ' ' + xlabels: Compressors + ylabels: Accuracy + legend_title: ' ' + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_metric_train_time.pdf + x: Metric + y: train_time + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: Metrics + ylabels: Training Time (s) + legend_title: Metrics + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + y_scale: linear + - file: symmetric_vs_string_metric_train_time.pdf + x: Metric + y: train_time + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: Compressors + ylabels: Training Time (s) + legend_title: String Metrics + order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_compressor_metric_train_time.pdf + x: Metric + y: train_time + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: Compressors + ylabels: Training Time (s) + legend_title: Metrics + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + line_plot: + - file: compressor_metric_vs_accuracy.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_accuracy.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: string_metric_vs_accuracy.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_train_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: compressor_metric_vs_train_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: string_metric_vs_train_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: compressor_metric_vs_predict_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - file: metric_vs_predict_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + - file: string_metric_vs_predict_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 outs: - - path: sms_spam/logs/train/gzip_logistic + - path: kdd_nsl/plots/compressor_metric_vs_accuracy.pdf hash: md5 - md5: e9577cb3ce87a9e0a55da46017111e2a.dir - size: 8225 - nfiles: 4 - - path: sms_spam/reports/train/gzip_logistic/score_dict.json - hash: md5 - md5: 8c39b120c89ed2d1c51c88d99f202ab1 - size: 281 - test_each_dataset@kdd_nsl-gzip_knn: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_knn data.sample.train_size=100 - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_knn model=gzip_knn - hydra.run.dir=kdd_nsl/logs/train/gzip_knn ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json + md5: c489db933d8ba20b46f2c660a0a3047a + size: 21218 + - path: kdd_nsl/plots/metric_vs_accuracy.pdf hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + md5: 7a142e5701cc21160fda0863069f047d + size: 24512 + - path: kdd_nsl/plots/string_metric_vs_accuracy.pdf hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/logs/train/gzip_knn + md5: 887d2ab7003eaf8f7802f4283dfc7fef + size: 20482 + - path: kdd_nsl/plots/symmetric_vs_compressor_metric.pdf hash: md5 - md5: d9f95ac89efb51e0b9474a50ed1ee34d.dir - size: 8108 - nfiles: 4 - - path: kdd_nsl/reports/train/gzip_knn/score_dict.json - hash: md5 - md5: 1bb23417615a5663b20ae3c9bb05ab41 - size: 284 - test_each_dataset@kdd_nsl-gzip_svc: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_svc data.sample.train_size=100 - files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl model_name=gzip_svc model=gzip_svc - hydra.run.dir=kdd_nsl/logs/train/gzip_svc ++raise_exception=True ' - deps: - - path: kdd_nsl/reports/train/default/score_dict.json + md5: 3a7c06d30bdcbca9f6a07d638868fbba + size: 21400 + - path: kdd_nsl/plots/symmetric_vs_metric.pdf hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + md5: c6702ce379c3f136e12dc9ea9026388a + size: 31309 + - path: kdd_nsl/plots/symmetric_vs_metric_train_time.pdf + hash: md5 + md5: 96cbbe31be92230fb5fa87cc8c4e439f + size: 32172 + - path: kdd_nsl/plots/symmetric_vs_string_metric.pdf + hash: md5 + md5: cc66d61cd5b6709b480d5040eca3dd6a + size: 22907 + - path: kdd_nsl/plots/symmetric_vs_string_metric_train_time.pdf + hash: md5 + md5: 2a87a16ab34be554a1c5cba1a00f5ff8 + size: 25045 + clean@ddos-gzip_knn: + cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_knn.csv -o ddos/plots/clean/gzip_knn.csv + -c conf/clean.yaml + deps: + - path: ddos/reports/gzip_knn.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 300b372df1c4be34b85f4080667329a1 + size: 1537512 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: kdd_nsl/logs/train/gzip_svc + - path: ddos/plots/clean/gzip_knn.csv hash: md5 - md5: 8efe1af9a07fe35bf35a620aecc9984e.dir - size: 8105 - nfiles: 4 - - path: kdd_nsl/reports/train/gzip_svc/score_dict.json - hash: md5 - md5: 6e851ecef3c53745a566ce54bc9b64e3 - size: 283 - test_each_dataset@kdd_nsl-gzip_logistic: - cmd: 'python -m deckard.layers.optimise stage=train files.name=gzip_logistic - data.sample.train_size=100 files.directory=kdd_nsl data=kdd_nsl dataset=kdd_nsl - model_name=gzip_logistic model=gzip_logistic hydra.run.dir=kdd_nsl/logs/train/gzip_logistic - ++raise_exception=True ' + md5: 4dcfbd9357af1a17978265cd5cf7b389 + size: 1231290 + clean@ddos-gzip_logistic: + cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_logistic.csv -o + ddos/plots/clean/gzip_logistic.csv -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + - path: ddos/reports/gzip_logistic.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 476499955f6c0b8f796c2d8274ad108d + size: 1387052 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: kdd_nsl/logs/train/gzip_logistic - hash: md5 - md5: b3b1f1813a6bc3b51b1aca53b3730892.dir - size: 8240 - nfiles: 4 - - path: kdd_nsl/reports/train/gzip_logistic/score_dict.json + - path: ddos/plots/clean/gzip_logistic.csv hash: md5 - md5: ce2f45436d570475e2cd62b1d5417305 - size: 281 - test_each_metric@jaro-gzip_knn-kdd_nsl-20: - cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/jaro/20 - files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl - model=gzip_knn model_name=gzip_knn model.init.metric=jaro model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/jaro/20 - ++raise_exception=True ' + md5: 10f4e37f4dc1bf7874461430c547a9c8 + size: 929254 + clean@ddos-gzip_svc: + cmd: python -m deckard.layers.clean_data -i ddos/reports/gzip_svc.csv -o ddos/plots/clean/gzip_svc.csv + -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + - path: ddos/reports/gzip_svc.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: d85b5ddf9fab15d76641603c4d774a79 + size: 1376765 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: kdd_nsl/logs/test_each_metric/gzip_knn/jaro/20 + - path: ddos/plots/clean/gzip_svc.csv hash: md5 - md5: 8b71ff09c44e615322095f861b3f1dca.dir - size: 7662 - nfiles: 4 - - path: kdd_nsl/reports/test_each_metric/gzip_knn/jaro/20/score_dict.json + md5: 39e10d3afe8e5a6a008300166abf64b6 + size: 1111620 + merge@ddos: + cmd: python merge.py --big_dir ddos/plots/ --data_file clean/gzip_knn.csv --little_dir_data_file + clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder ddos/plots --output_file + merged.csv + deps: + - path: ddos/plots/clean/gzip_knn.csv hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_metric@jaro_winkler-gzip_knn-kdd_nsl-20: - cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/jaro_winkler/20 - files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl - model=gzip_knn model_name=gzip_knn model.init.metric=jaro_winkler model.init.m=-1 - hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/jaro_winkler/20 ++raise_exception=True ' + md5: 4dcfbd9357af1a17978265cd5cf7b389 + size: 1231290 + - path: ddos/plots/clean/gzip_logistic.csv + hash: md5 + md5: 10f4e37f4dc1bf7874461430c547a9c8 + size: 929254 + - path: ddos/plots/clean/gzip_svc.csv + hash: md5 + md5: 39e10d3afe8e5a6a008300166abf64b6 + size: 1111620 + outs: + - path: ddos/plots/merged.csv + hash: md5 + md5: ddd7e1f8412a6a8d397888033a755ad2 + size: 3305983 + clean@truthseeker-gzip_knn: + cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_knn.csv + -o truthseeker/plots/clean/gzip_knn.csv -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json + - path: truthseeker/reports/gzip_knn.csv hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + md5: 2298733dbbc1d3a699eeaedaee005a91 + size: 1246208 + params: + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model + outs: + - path: truthseeker/plots/clean/gzip_knn.csv + hash: md5 + md5: 1f8dbb1f89957121ca5f935f2c6503bd + size: 691191 + clean@truthseeker-gzip_logistic: + cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_logistic.csv + -o truthseeker/plots/clean/gzip_logistic.csv -c conf/clean.yaml + deps: + - path: truthseeker/reports/gzip_logistic.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 6ed79959e5c663c55217dcf02ed58cc9 + size: 1351631 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: kdd_nsl/logs/test_each_metric/gzip_knn/jaro_winkler/20 + - path: truthseeker/plots/clean/gzip_logistic.csv hash: md5 - md5: 2b831c44b315a8b61c3f762b365c8e5f.dir - size: 7782 - nfiles: 4 - - path: kdd_nsl/reports/test_each_metric/gzip_knn/jaro_winkler/20/score_dict.json + md5: e06aa9e97e30f80c615606ecd610195c + size: 952678 + clean@truthseeker-gzip_svc: + cmd: python -m deckard.layers.clean_data -i truthseeker/reports/gzip_svc.csv + -o truthseeker/plots/clean/gzip_svc.csv -c conf/clean.yaml + deps: + - path: truthseeker/reports/gzip_svc.csv + hash: md5 + md5: e7567275d1f0e7952c116b6533d43c2d + size: 1366409 + params: + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model + outs: + - path: truthseeker/plots/clean/gzip_svc.csv hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_metric@seqratio-gzip_knn-kdd_nsl-20: - cmd: 'python -m deckard.layers.optimise stage=test_each_metric files.name=gzip_knn/seqratio/20 - files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl - model=gzip_knn model_name=gzip_knn model.init.metric=seqratio model.init.m=-1 - hydra.run.dir=kdd_nsl/logs/test_each_metric/gzip_knn/seqratio/20 ++raise_exception=True ' + md5: 39120e9e457e55ab86298d192b7b8d51 + size: 1112569 + merge@truthseeker: + cmd: python merge.py --big_dir truthseeker/plots/ --data_file clean/gzip_knn.csv + --little_dir_data_file clean/gzip_logistic.csv clean/gzip_svc.csv --output_folder + truthseeker/plots --output_file merged.csv deps: - - path: kdd_nsl/reports/train/default/score_dict.json + - path: truthseeker/plots/clean/gzip_knn.csv hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + md5: 1f8dbb1f89957121ca5f935f2c6503bd + size: 691191 + - path: truthseeker/plots/clean/gzip_logistic.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/logs/test_each_metric/gzip_knn/seqratio/20 + md5: e06aa9e97e30f80c615606ecd610195c + size: 952678 + - path: truthseeker/plots/clean/gzip_svc.csv hash: md5 - md5: ed632f40ed8ff016cb649ab00c408114.dir - size: 7722 - nfiles: 4 - - path: kdd_nsl/reports/test_each_metric/gzip_knn/seqratio/20/score_dict.json + md5: 39120e9e457e55ab86298d192b7b8d51 + size: 1112569 + outs: + - path: truthseeker/plots/merged.csv hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_model@gzip-gzip_knn-kdd_nsl-20: - cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_knn/gzip/20 - files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl - model=gzip_knn model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_model/gzip_knn/gzip/20 - ++raise_exception=True ' + md5: a6294ee4d1fc5b445dbf585745dfb18e + size: 2783534 + merge_datasets: + cmd: python merge.py --big_dir . --little_dir . --data_file sms_spam/plots/merged.csv + --little_dir_data_file kdd_nsl/plots/merged.csv ddos/plots/merged.csv truthseeker/plots/merged.csv + kdd_nsl/plots/condensed_merged.csv ddos/plots/condensed_merged.csv truthseeker/plots/condensed_merged.csv + sms_spam/plots/condensed_merged.csv --output_folder combined/plots/ --output_file + merged.csv deps: - - path: kdd_nsl/reports/train/default/score_dict.json + - path: ddos/plots/merged.csv hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + md5: ddd7e1f8412a6a8d397888033a755ad2 + size: 3305983 + - path: kdd_nsl/plots/merged.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss - outs: - - path: kdd_nsl/logs/test_each_model/gzip_knn/gzip/20 + md5: e9aaa44e6ef176c174b296c31a6760f9 + size: 2956133 + - path: sms_spam/plots/merged.csv hash: md5 - md5: c8075fa1867cb00a11f6df654086bd97.dir - size: 7675 - nfiles: 4 - - path: kdd_nsl/reports/test_each_model/gzip_knn/gzip/20/score_dict.json + md5: 4baf51fdcc220aedc6443147a057559e + size: 2765074 + - path: truthseeker/plots/merged.csv hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_model@gzip-gzip_svc-kdd_nsl-20: - cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_svc/gzip/20 - files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl - model=gzip_svc model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_model/gzip_svc/gzip/20 - ++raise_exception=True ' + md5: a6294ee4d1fc5b445dbf585745dfb18e + size: 2783534 + outs: + - path: combined/plots/merged.csv + hash: md5 + md5: a7ca9f759ab63a1649889ad57e928578 + size: 33289497 + clean@ddos-condense/svc: + cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/svc.csv -o + ddos/plots/clean/condense/svc.csv -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json + - path: ddos/reports/condense/svc.csv hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + md5: f7fa9ef13258b1cc8e4dee82f395cabc + size: 2853089 + params: + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model + outs: + - path: ddos/plots/clean/condense/svc.csv + hash: md5 + md5: a016c3958a5bedbce540628908c94082 + size: 2336402 + clean@truthseeker-condense/svc: + cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/svc.csv + -o truthseeker/plots/clean/condense/svc.csv -c conf/clean.yaml + deps: + - path: truthseeker/reports/condense/svc.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 789d469a26448549761aa6140fd4bc7d + size: 2260420 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: kdd_nsl/logs/test_each_model/gzip_svc/gzip/20 - hash: md5 - md5: 6ec9663f42d781dc482f1da6df886312.dir - size: 7678 - nfiles: 4 - - path: kdd_nsl/reports/test_each_model/gzip_svc/gzip/20/score_dict.json + - path: truthseeker/plots/clean/condense/svc.csv hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - test_each_model@gzip-gzip_logistic-kdd_nsl-20: - cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_logistic/gzip/20 - files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl - model=gzip_logistic model_name=gzip_knn model.init.metric=gzip model.init.m=-1 - hydra.run.dir=kdd_nsl/logs/test_each_model/gzip_logistic/gzip/20 ++raise_exception=True ' + md5: 5217ab37267115a9f3a887dda0ca9716 + size: 1837203 + clean@truthseeker-condense/logistic: + cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/logistic.csv + -o truthseeker/plots/clean/condense/logistic.csv -c conf/clean.yaml deps: - - path: kdd_nsl/reports/train/default/score_dict.json - hash: md5 - md5: 81a03f1290fe4d5eaa739ba9807b5b20 - size: 488 - - path: params.yaml + - path: truthseeker/reports/condense/logistic.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: d7854b284f4668d9b5706002ede597cd + size: 1461329 params: - params.yaml: - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - dataset: kdd_nsl - device_id: cpu - files: - _target_: deckard.base.files.FileConfig - data_dir: data - data_type: .csv - directory: kdd_nsl - model_dir: model - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json - model: - _target_: deckard.base.model.Model - data: - _target_: deckard.base.data.Data - name: raw_data/kdd_nsl_undersampled_5000.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - target: label - init: - _target_: deckard.base.model.ModelInitializer - distance_matrix: kdd_nsl/model/gzip/100-100/0.npz - k: 1 - m: -1 - metric: gzip - name: gzip_classifier.GzipKNN - symmetric: false - library: sklearn - model_name: gzip_knn - scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: kdd_nsl/logs/test_each_model/gzip_logistic/gzip/20 + - path: truthseeker/plots/clean/condense/logistic.csv hash: md5 - md5: 8ba9f7659cef2c4d610fece176de1548.dir - size: 7767 - nfiles: 4 - - path: kdd_nsl/reports/test_each_model/gzip_logistic/gzip/20/score_dict.json + md5: 2834667122a045b2815d6d8669d13855 + size: 1195763 + clean@truthseeker-condense/knn: + cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/knn.csv + -o truthseeker/plots/clean/condense/knn.csv -c conf/clean.yaml + deps: + - path: truthseeker/reports/condense/knn.csv hash: md5 - md5: 5d8bf090bc8e34df8ed01766adfca5eb - size: 26 - grid_search@20-kdd_nsl-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=kdd_nsl/logs/gzip_knn/20 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/20/study.csv - files.directory=kdd_nsl files.reports=reports/gzip_knn/20 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_knn --multirun + md5: 09ff6b9152372998f2cc0cf9e5b10a52 + size: 2364296 + params: + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model + outs: + - path: truthseeker/plots/clean/condense/knn.csv + hash: md5 + md5: bb4310ab3db56fef5287c968e923a946 + size: 1416979 + plot@truthseeker: + cmd: python -m deckard.layers.plots --path truthseeker/plots/ --file truthseeker/plots/merged.csv -c + conf/plots.yaml deps: - - path: conf/gzip_knn.yaml + - path: conf/plots.yaml hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 - - path: params.yaml + md5: 43e3ec0876b55c83f231615f7a904e33 + size: 7386 + - path: truthseeker/plots/merged.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: a6294ee4d1fc5b445dbf585745dfb18e + size: 2783534 params: - conf/gzip_knn.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - direction: ${direction} - storage: sqlite:///optuna.db - study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 - max_failure_rate: 1.0 - params: - model.init.k: 1,3,5,7,11 - +model.init.weights: uniform,distance - +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_knn + conf/plots.yaml: + cat_plot: + - file: symmetric_vs_compressor_metric.pdf + x: Metric + y: accuracy + hue: Symmetric + errorbar: se + kind: boxen + titles: ' ' + xlabels: Compressor + ylabels: Accuracy + legend_title: Metrics + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_string_metric.pdf + x: Metric + y: accuracy + hue: Symmetric + errorbar: se + kind: boxen + titles: ' ' + xlabels: Compressors + ylabels: Accuracy + legend_title: ' ' + order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_metric.pdf + x: Metric + y: accuracy + hue: Symmetric + errorbar: se + kind: boxen + titles: ' ' + xlabels: Compressors + ylabels: Accuracy + legend_title: ' ' + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_metric_train_time.pdf + x: Metric + y: train_time + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: Metrics + ylabels: Training Time (s) + legend_title: Metrics + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + y_scale: linear + - file: symmetric_vs_string_metric_train_time.pdf + x: Metric + y: train_time + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: Compressors + ylabels: Training Time (s) + legend_title: String Metrics + order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_compressor_metric_train_time.pdf + x: Metric + y: train_time + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: Compressors + ylabels: Training Time (s) + legend_title: Metrics + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + line_plot: + - file: compressor_metric_vs_accuracy.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_accuracy.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: string_metric_vs_accuracy.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_train_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: compressor_metric_vs_train_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: string_metric_vs_train_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: compressor_metric_vs_predict_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - file: metric_vs_predict_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + - file: string_metric_vs_predict_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 outs: - - path: kdd_nsl/logs/gzip_knn/20 + - path: truthseeker/plots/compressor_metric_vs_accuracy.pdf hash: md5 - md5: 5c03e3e52e7a24e15acbd0b2aadfee35.dir - size: 1389089 - nfiles: 514 - - path: kdd_nsl/reports/gzip_knn/20/train/ + md5: fe9b34fc5c7bdb52f8092be432715ad6 + size: 19529 + - path: truthseeker/plots/metric_vs_accuracy.pdf hash: md5 - md5: a7e0e97547bfac97d8518259bffdd4c1.dir - size: 1847622 - nfiles: 1661 - grid_search@20-kdd_nsl-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic - model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_kdd_nsl - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/20 - hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/20/study.csv - files.directory=kdd_nsl files.reports=reports/gzip_logistic/20 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_logistic --multirun + md5: 2a49ccd20406d6d58692f241855c3d08 + size: 22804 + - path: truthseeker/plots/string_metric_vs_accuracy.pdf + hash: md5 + md5: 9ae3cf88045c9556d26df2d79d493e35 + size: 20944 + - path: truthseeker/plots/symmetric_vs_compressor_metric.pdf + hash: md5 + md5: e1765300eb083de79d90786e3ca35374 + size: 21369 + - path: truthseeker/plots/symmetric_vs_metric.pdf + hash: md5 + md5: 00178f8d5d5644099848f066d44d5316 + size: 31272 + - path: truthseeker/plots/symmetric_vs_metric_train_time.pdf + hash: md5 + md5: ae31c23accfaa8696452aceae673db53 + size: 32498 + - path: truthseeker/plots/symmetric_vs_string_metric.pdf + hash: md5 + md5: 79fd5831809a53057c775ef1c52e089a + size: 23079 + - path: truthseeker/plots/symmetric_vs_string_metric_train_time.pdf + hash: md5 + md5: 35f27e898700bb9b4e941a1c6fc06273 + size: 24558 + plot@ddos: + cmd: python -m deckard.layers.plots --path ddos/plots/ --file ddos/plots/merged.csv -c + conf/plots.yaml deps: - - path: conf/gzip_logistic.yaml + - path: conf/plots.yaml hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 - - path: params.yaml + md5: 43e3ec0876b55c83f231615f7a904e33 + size: 7386 + - path: ddos/plots/merged.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: ddd7e1f8412a6a8d397888033a755ad2 + size: 3305983 params: - conf/gzip_logistic.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.id} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 - params: - +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.fit_intercept: True,False - +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_logistic + conf/plots.yaml: + cat_plot: + - file: symmetric_vs_compressor_metric.pdf + x: Metric + y: accuracy + hue: Symmetric + errorbar: se + kind: boxen + titles: ' ' + xlabels: Compressor + ylabels: Accuracy + legend_title: Metrics + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_string_metric.pdf + x: Metric + y: accuracy + hue: Symmetric + errorbar: se + kind: boxen + titles: ' ' + xlabels: Compressors + ylabels: Accuracy + legend_title: ' ' + order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_metric.pdf + x: Metric + y: accuracy + hue: Symmetric + errorbar: se + kind: boxen + titles: ' ' + xlabels: Compressors + ylabels: Accuracy + legend_title: ' ' + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_metric_train_time.pdf + x: Metric + y: train_time + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: Metrics + ylabels: Training Time (s) + legend_title: Metrics + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + y_scale: linear + - file: symmetric_vs_string_metric_train_time.pdf + x: Metric + y: train_time + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: Compressors + ylabels: Training Time (s) + legend_title: String Metrics + order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: symmetric_vs_compressor_metric_train_time.pdf + x: Metric + y: train_time + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: Compressors + ylabels: Training Time (s) + legend_title: Metrics + order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + hue_order: + - Asymmetric + - Symmetric + rotation: 90 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + line_plot: + - file: compressor_metric_vs_accuracy.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_accuracy.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: string_metric_vs_accuracy.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: metric_vs_train_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: compressor_metric_vs_train_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: string_metric_vs_train_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: Training Time (s) + y_scale: linear + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: compressor_metric_vs_predict_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - file: metric_vs_predict_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + - file: string_metric_vs_predict_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time + ylabel: Prediction Time (s) + y_scale: linear + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + legend: + title: Metrics + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 outs: - - path: kdd_nsl/logs/gzip_logistic/20 - hash: md5 - md5: e7528ce71bad9f745a9f5e4fcf3a2df1.dir - size: 1571121 - nfiles: 514 - - path: kdd_nsl/reports/gzip_logistic/20/train/ - hash: md5 - md5: 127796b95b1817c4b0d9f1846537b0a6.dir - size: 2083086 - nfiles: 1772 - grid_search@20-kdd_nsl-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=kdd_nsl/logs/gzip_svc/20 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/20/study.csv - files.directory=kdd_nsl files.reports=reports/gzip_svc/20 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_svc --multirun - deps: - - path: conf/gzip_svc.yaml - hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 - - path: params.yaml + - path: ddos/plots/compressor_metric_vs_accuracy.pdf hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - conf/gzip_svc.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.id} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: - - maximize - metric_names: - - accuracy - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} - storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 - params: - +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.gamma: scale,auto - +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_svc - outs: - - path: kdd_nsl/logs/gzip_svc/20 + md5: 4e9ec7bc40de0eb9686c80001471c633 + size: 21223 + - path: ddos/plots/metric_vs_accuracy.pdf hash: md5 - md5: a1cb35a26808d09dac04aef8fc7106cb.dir - size: 1524012 - nfiles: 514 - - path: kdd_nsl/reports/gzip_svc/20/train/ + md5: 55f65e038473f751761c89450273e99f + size: 24492 + - path: ddos/plots/string_metric_vs_accuracy.pdf hash: md5 - md5: f475c4428240afaaf863bb021eb82890.dir - size: 2095726 - nfiles: 2092 - grid_search@20-truthseeker-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=truthseeker/logs/gzip_knn/20 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/20/study.csv - files.directory=truthseeker files.reports=reports/gzip_knn/20 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_knn --multirun - deps: - - path: conf/gzip_knn.yaml + md5: 080a9ad5352a1c8a4ea0742d8fa2064d + size: 21341 + - path: ddos/plots/symmetric_vs_compressor_metric.pdf hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 - - path: params.yaml + md5: 7868ca14c1c3b8cff7377e570b3cd1fd + size: 21164 + - path: ddos/plots/symmetric_vs_metric.pdf hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - conf/gzip_knn.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - direction: ${direction} - storage: sqlite:///optuna.db - study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 - max_failure_rate: 1.0 - params: - model.init.k: 1,3,5,7,11 - +model.init.weights: uniform,distance - +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_knn - outs: - - path: truthseeker/logs/gzip_knn/20 + md5: 3a1fdd75ec075371e20a43f6fceb5865 + size: 31323 + - path: ddos/plots/symmetric_vs_metric_train_time.pdf hash: md5 - md5: 21da241789a9856418302895c146cd4d.dir - size: 1370161 - nfiles: 514 - - path: truthseeker/reports/gzip_knn/20/train/ + md5: 2d477f3dae3b1985f0f06b4b50e47b6d + size: 32595 + - path: ddos/plots/symmetric_vs_string_metric.pdf hash: md5 - md5: 394a7d8c033166c958996d646f822460.dir - size: 376291 - nfiles: 340 - grid_search@20-truthseeker-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic - model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_truthseeker - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/20 - hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/20/study.csv - files.directory=truthseeker files.reports=reports/gzip_logistic/20 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_logistic --multirun + md5: c1d20c58447ed0ce378586a0a741cd2f + size: 23231 + - path: ddos/plots/symmetric_vs_string_metric_train_time.pdf + hash: md5 + md5: 96008fa9732748ceca2292daa7b10d5c + size: 25192 + merge_condense@truthseeker: + cmd: python merge.py --big_dir truthseeker/plots/ --data_file clean/condense/knn.csv + --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder + truthseeker/plots/ --output_file condensed_merged.csv deps: - - path: conf/gzip_logistic.yaml + - path: truthseeker/plots/clean/condense/knn.csv hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 - - path: params.yaml + md5: bb4310ab3db56fef5287c968e923a946 + size: 1416979 + - path: truthseeker/plots/clean/condense/logistic.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - conf/gzip_logistic.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.id} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 - params: - +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.fit_intercept: True,False - +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_logistic + md5: 2834667122a045b2815d6d8669d13855 + size: 1195763 + - path: truthseeker/plots/clean/condense/svc.csv + hash: md5 + md5: 5217ab37267115a9f3a887dda0ca9716 + size: 1837203 outs: - - path: truthseeker/logs/gzip_logistic/20 + - path: truthseeker/plots/condensed_merged.csv hash: md5 - md5: 4eceda9fdfa787e48b4a2d397ad89332.dir - size: 1497002 - nfiles: 514 - - path: truthseeker/reports/gzip_logistic/20/train/ + md5: fc78969e3c4df404d5954d906de1e2fe + size: 4494580 + plot_condense@truthseeker: + cmd: python -m deckard.layers.plots --path truthseeker/plots/ --file truthseeker/plots/condensed_merged.csv -c + conf/condensed_plots.yaml + deps: + - path: conf/condensed_plots.yaml hash: md5 - md5: 9b32f4ef152eda3a3f2e68d424d163d2.dir - size: 555897 - nfiles: 366 - grid_search@20-truthseeker-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=truthseeker/logs/gzip_svc/20 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/20/study.csv - files.directory=truthseeker files.reports=reports/gzip_svc/20 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_svc --multirun + md5: af17fa58e7c01bcbb396ab08de5b78d5 + size: 1915 + - path: truthseeker/plots/condensed_merged.csv + hash: md5 + md5: fc78969e3c4df404d5954d906de1e2fe + size: 4494580 + params: + conf/condensed_plots.yaml: + cat_plot: + - file: condensing_method_vs_accuracy.pdf + digitize: Condensing Ratio + x: Condensing Method + hue: Condensing Ratio + y: accuracy + y_scale: linear + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + kind: boxen + col: Model + rotation: 45 + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xlabels: Condensing Method + ylabels: Accuracy + legend_title: Sample Ratio + - file: condensing_method_vs_train_time.pdf + x: Condensing Method + hue: Condensing Ratio + digitize: Condensing Ratio + y: train_time + y_scale: log + kind: boxen + col: Model + rotation: 45 + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - k-NN + xlabels: Condensing Method + ylabels: Training Time + legend_title: Sample Ratio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: condensing_method_vs_predict_time.pdf + x: Condensing Method + hue: Condensing Ratio + digitize: Condensing Ratio + y: predict_time + y_scale: log + col: Model + rotation: 45 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + kind: boxen + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - k-NN + xlabels: Condensing Method + ylabels: Prediction Time + legend_title: Sample Ratio + outs: + - path: truthseeker/plots/condensing_method_vs_accuracy.pdf + hash: md5 + md5: 43daa962adc5b178b1ecd1ce631f7a82 + size: 79151 + - path: truthseeker/plots/condensing_method_vs_predict_time.pdf + hash: md5 + md5: 8052368bafdaa94f3135e094f68bd55c + size: 76155 + - path: truthseeker/plots/condensing_method_vs_train_time.pdf + hash: md5 + md5: 5a88008752dd280bc73cee793026b594 + size: 75513 + copy@truthseeker: + cmd: rm -rf ~/Gzip-KNN/figs/truthseeker/ && mkdir -p ~/Gzip-KNN/figs/truthseeker/ + && cp -r truthseeker/plots/* ~/Gzip-KNN/figs/truthseeker/ && rm -rf ~/Gzip-KNN/figs/truthseeker/.gitignore deps: - - path: conf/gzip_svc.yaml + - path: truthseeker/plots/ hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 - - path: params.yaml + md5: fed82eba40c5f980d2ecc49dcd0bd732.dir + size: 15135833 + nfiles: 29 + clean@ddos-condense/knn: + cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/knn.csv -o + ddos/plots/clean/condense/knn.csv -c conf/clean.yaml + deps: + - path: ddos/reports/condense/knn.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 1bd44b90db430d5d5785537fe732b2a6 + size: 2816581 params: - conf/gzip_svc.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.id} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: - - maximize - metric_names: - - accuracy - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} - storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 - params: - +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.gamma: scale,auto - +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_svc + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: truthseeker/logs/gzip_svc/20 - hash: md5 - md5: 20a01b45b6f1901a8e929bf1cbccd349.dir - size: 1473672 - nfiles: 514 - - path: truthseeker/reports/gzip_svc/20/train/ + - path: ddos/plots/clean/condense/knn.csv hash: md5 - md5: a2b059debfa307134c83ec03713e8a50.dir - size: 546743 - nfiles: 384 - grid_search@20-sms_spam-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=sms_spam/logs/gzip_knn/20 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/20/study.csv - files.directory=sms_spam files.reports=reports/gzip_knn/20 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_knn --multirun + md5: 3a1acbf38f64695356c6b052547800f7 + size: 2246228 + clean@ddos-condense/logistic: + cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/logistic.csv + -o ddos/plots/clean/condense/logistic.csv -c conf/clean.yaml deps: - - path: conf/gzip_knn.yaml - hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 - - path: params.yaml + - path: ddos/reports/condense/logistic.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 83a34019f32c069c16172b171a602a26 + size: 2848813 params: - conf/gzip_knn.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - direction: ${direction} - storage: sqlite:///optuna.db - study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 - max_failure_rate: 1.0 - params: - model.init.k: 1,3,5,7,11 - +model.init.weights: uniform,distance - +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_knn + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model outs: - - path: sms_spam/logs/gzip_knn/20 - hash: md5 - md5: bcee56ea959096e8255fb482a8854457.dir - size: 1381168 - nfiles: 514 - - path: sms_spam/reports/gzip_knn/20/train/ + - path: ddos/plots/clean/condense/logistic.csv hash: md5 - md5: 12133daeda911e75210cff4d8a3fa5a7.dir - size: 379524 - nfiles: 326 - grid_search@20-sms_spam-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic - model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_sms_spam - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/20 - hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/20/study.csv - files.directory=sms_spam files.reports=reports/gzip_logistic/20 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_logistic --multirun + md5: 37106f4477460267406dd80d90987cac + size: 2287660 + merge_condense@ddos: + cmd: python merge.py --big_dir ddos/plots/ --data_file clean/condense/knn.csv + --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder + ddos/plots/ --output_file condensed_merged.csv deps: - - path: conf/gzip_logistic.yaml + - path: ddos/plots/clean/condense/knn.csv hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 - - path: params.yaml + md5: 3a1acbf38f64695356c6b052547800f7 + size: 2246228 + - path: ddos/plots/clean/condense/logistic.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - conf/gzip_logistic.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.id} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 - params: - +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.fit_intercept: True,False - +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_logistic - outs: - - path: sms_spam/logs/gzip_logistic/20 + md5: 37106f4477460267406dd80d90987cac + size: 2287660 + - path: ddos/plots/clean/condense/svc.csv hash: md5 - md5: 5c7265a3ac4bf4774fbb1c440b9910c4.dir - size: 1520121 - nfiles: 514 - - path: sms_spam/reports/gzip_logistic/20/train/ + md5: a016c3958a5bedbce540628908c94082 + size: 2336402 + outs: + - path: ddos/plots/condensed_merged.csv hash: md5 - md5: 9ae8109f623b19dcbabe51e4401a1f8c.dir - size: 552539 - nfiles: 357 - grid_search@20-sms_spam-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=sms_spam/logs/gzip_svc/20 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/20/study.csv - files.directory=sms_spam files.reports=reports/gzip_svc/20 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_svc --multirun + md5: a509ca15f5da44a1c7fd5fa86541824a + size: 6939926 + plot_condense@ddos: + cmd: python -m deckard.layers.plots --path ddos/plots/ --file ddos/plots/condensed_merged.csv -c + conf/condensed_plots.yaml deps: - - path: conf/gzip_svc.yaml + - path: conf/condensed_plots.yaml hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 - - path: params.yaml + md5: af17fa58e7c01bcbb396ab08de5b78d5 + size: 1915 + - path: ddos/plots/condensed_merged.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: a509ca15f5da44a1c7fd5fa86541824a + size: 6939926 params: - conf/gzip_svc.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.id} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: - - maximize - metric_names: - - accuracy - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} - storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 - params: - +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.gamma: scale,auto - +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_svc - outs: - - path: sms_spam/logs/gzip_svc/20 - hash: md5 - md5: fe6324545be6dc97b88326e10a65e815.dir - size: 1451676 - nfiles: 514 - - path: sms_spam/reports/gzip_svc/20/train/ - hash: md5 - md5: 814632194dc03d626a24f0418fd703e1.dir - size: 542357 - nfiles: 384 - grid_search@20-ddos-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20 - data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=ddos/logs/gzip_knn/20 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/20/study.csv - files.directory=ddos files.reports=reports/gzip_knn/20 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_knn --multirun + conf/condensed_plots.yaml: + cat_plot: + - file: condensing_method_vs_accuracy.pdf + digitize: Condensing Ratio + x: Condensing Method + hue: Condensing Ratio + y: accuracy + y_scale: linear + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + kind: boxen + col: Model + rotation: 45 + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xlabels: Condensing Method + ylabels: Accuracy + legend_title: Sample Ratio + - file: condensing_method_vs_train_time.pdf + x: Condensing Method + hue: Condensing Ratio + digitize: Condensing Ratio + y: train_time + y_scale: log + kind: boxen + col: Model + rotation: 45 + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - k-NN + xlabels: Condensing Method + ylabels: Training Time + legend_title: Sample Ratio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: condensing_method_vs_predict_time.pdf + x: Condensing Method + hue: Condensing Ratio + digitize: Condensing Ratio + y: predict_time + y_scale: log + col: Model + rotation: 45 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + kind: boxen + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - k-NN + xlabels: Condensing Method + ylabels: Prediction Time + legend_title: Sample Ratio + outs: + - path: ddos/plots/condensing_method_vs_accuracy.pdf + hash: md5 + md5: 799f438072661472c3581b7783187e27 + size: 95036 + - path: ddos/plots/condensing_method_vs_predict_time.pdf + hash: md5 + md5: e9d99a4d20977d908bc6125b4d3ec64c + size: 92611 + - path: ddos/plots/condensing_method_vs_train_time.pdf + hash: md5 + md5: 38d50e2531e75b0ed7e25f99fe3a020a + size: 92297 + plot_condense@kdd_nsl: + cmd: python -m deckard.layers.plots --path kdd_nsl/plots/ --file kdd_nsl/plots/condensed_merged.csv -c + conf/condensed_plots.yaml deps: - - path: conf/gzip_knn.yaml + - path: conf/condensed_plots.yaml hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 - - path: params.yaml + md5: af17fa58e7c01bcbb396ab08de5b78d5 + size: 1915 + - path: kdd_nsl/plots/condensed_merged.csv hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 3ce3f32f881b93574c5e475e5617847e + size: 5582885 params: - conf/gzip_knn.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - direction: ${direction} - storage: sqlite:///optuna.db - study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 - max_failure_rate: 1.0 - params: - model.init.k: 1,3,5,7,11 - +model.init.weights: uniform,distance - +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_knn + conf/condensed_plots.yaml: + cat_plot: + - file: condensing_method_vs_accuracy.pdf + digitize: Condensing Ratio + x: Condensing Method + hue: Condensing Ratio + y: accuracy + y_scale: linear + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + kind: boxen + col: Model + rotation: 45 + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xlabels: Condensing Method + ylabels: Accuracy + legend_title: Sample Ratio + - file: condensing_method_vs_train_time.pdf + x: Condensing Method + hue: Condensing Ratio + digitize: Condensing Ratio + y: train_time + y_scale: log + kind: boxen + col: Model + rotation: 45 + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - k-NN + xlabels: Condensing Method + ylabels: Training Time + legend_title: Sample Ratio + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + - file: condensing_method_vs_predict_time.pdf + x: Condensing Method + hue: Condensing Ratio + digitize: Condensing Ratio + y: predict_time + y_scale: log + col: Model + rotation: 45 + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + kind: boxen + order: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - KNN + xticklabels: + - Random + - Medoid + - Sum + - SVC + - Hardness + - NearMiss + - k-NN + xlabels: Condensing Method + ylabels: Prediction Time + legend_title: Sample Ratio + outs: + - path: kdd_nsl/plots/condensing_method_vs_accuracy.pdf + hash: md5 + md5: 02804fa85242e8873e257703d36292b3 + size: 93543 + - path: kdd_nsl/plots/condensing_method_vs_predict_time.pdf + hash: md5 + md5: a19ac9d498ba7a48818804efd89cc7ac + size: 89049 + - path: kdd_nsl/plots/condensing_method_vs_train_time.pdf + hash: md5 + md5: 0b856f827819de35d07371b6801edf04 + size: 88882 + plot_merged: + cmd: python -m deckard.layers.plots --path combined/plots/ --file combined/plots/merged.csv -c + conf/merged_plots.yaml + deps: + - path: combined/plots/merged.csv + hash: md5 + md5: a7ca9f759ab63a1649889ad57e928578 + size: 33289497 + - path: conf/merged_plots.yaml + hash: md5 + md5: 07cbd496003579ae0a5dc56bf03dc1a5 + size: 8296 + params: + conf/merged_plots.yaml: + cat_plot: + - file: models_vs_accuracy.pdf + x: Model + y: accuracy + hue: data.sample.train_size + errorbar: se + kind: boxen + titles: + xlabels: ' ' + ylabels: Accuracy + legend_title: Samples + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + rotation: 90 + col: Dataset + order: + - k-KNN + - k-SVC + - k-Logistic + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + - file: models_vs_train_time.pdf + x: Model + y: train_time + hue: data.sample.train_size + errorbar: se + kind: boxen + titles: + xlabels: ' ' + ylabels: $t_t$ (s) + legend_title: Samples + rotation: 90 + col: Dataset + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + y_scale: log + order: + - k-KNN + - k-SVC + - k-Logistic + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + - file: models_vs_predict_time.pdf + x: Model + y: predict_time_per_sample + hue: data.sample.train_size + errorbar: se + kind: boxen + titles: + xlabels: ' ' + ylabels: $t_i$ (s) + legend_title: Samples + col: Dataset + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + rotation: 90 + y_scale: log + order: + - k-KNN + - k-SVC + - k-Logistic + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + - file: symmetric_models_vs_accuracy.pdf + row: Model + x: data.sample.train_size + y: accuracy + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: Samples + ylabels: Accuracy + legend_title: ' ' + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + rotation: 90 + col: Dataset + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + row_order: + - k-KNN + - k-SVC + - k-Logistic + - file: symmetric_models_vs_train_time.pdf + row: Model + x: data.sample.train_size + y: train_time_per_sample + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: ' ' + ylabels: $t_t$ (s) + legend_title: ' ' + rotation: 90 + col: Dataset + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + y_scale: log + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + row_order: + - k-KNN + - k-SVC + - k-Logistic + - file: symmetric_models_vs_predict_time.pdf + x: data.sample.train_size + row: Model + y: predict_time_per_sample + hue: Symmetric + errorbar: se + kind: boxen + titles: + xlabels: ' ' + ylabels: $t_i$ (s) + legend_title: ' ' + col: Dataset + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + rotation: 90 + y_scale: log + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + row_order: + - k-KNN + - k-SVC + - k-Logistic + - file: condensing_methods_vs_accuracy.pdf + x: Model + y: accuracy + hue: Condensing Method + errorbar: se + kind: boxen + titles: + xlabels: ' ' + ylabels: Accuracy + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + rotation: 90 + col: Dataset + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + order: + - k-KNN + - k-SVC + - k-Logistic + legend_title: Condensing Method + - file: condensing_methods_vs_train_time.pdf + x: Model + y: train_time + hue: Condensing Method + errorbar: se + kind: boxen + titles: + xlabels: ' ' + ylabels: $t_t$ (s) + legend_title: Condensing Method + rotation: 90 + col: Dataset + y_scale: log + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + order: + - k-KNN + - k-SVC + - k-Logistic + - file: condensing_methods_vs_predict_time.pdf + x: Model + y: predict_time_per_sample + hue: Condensing Method + errorbar: se + kind: boxen + titles: + xlabels: ' ' + ylabels: $t_i$ (s) + legend_title: Condensing Method + col: Dataset + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 14 + rotation: 90 + y_scale: log + col_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + order: + - k-KNN + - k-SVC + - k-Logistic + line_plot: + - file: compressor_metric_vs_accuracy.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + style: Dataset + style_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 12 + - file: string_metric_vs_accuracy.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: accuracy + ylabel: Accuracy + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + style: Dataset + style_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 12 + - file: string_metric_vs_train_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: $t_t$ (s) + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + style: Dataset + style_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 12 + y_scale: log + - file: compressor_metric_vs_train_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: train_time + ylabel: $t_t$ (s) + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + style: Dataset + style_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 12 + y_scale: log + - file: string_metric_vs_predict_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time_per_sample + ylabel: $t_i$ (s) + hue_order: + - Levenshtein + - Ratio + - Hamming + - Jaro + - Jaro-Winkler + - SeqRatio + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + style: Dataset + style_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 12 + y_scale: log + - file: compressor_metric_vs_predict_time.pdf + hue: Metric + title: + x: data.sample.train_size + xlabel: Number of Training Samples + y: predict_time_per_sample + ylabel: $t_i$ (s) + hue_order: + - GZIP + - Pickle + - BZ2 + - ZSTD + - LZMA + errorbar: se + err_style: bars + xlim: + - 10 + - 500 + style: Dataset + style_order: + - DDoS + - SMS Spam + - KDD NSL + - Truthseeker + legend: + bbox_to_anchor: + - 1.05 + - 0.5 + loc: center left + prop: + size: 12 + y_scale: log outs: - - path: ddos/logs/gzip_knn/20 + - path: combined/plots/compressor_metric_vs_accuracy.pdf hash: md5 - md5: 057fc9613b2210a0dd1e03ef46f3d6bc.dir - size: 1616211 - nfiles: 514 - - path: ddos/reports/gzip_knn/20/train/ + md5: 48aea5d713cb4eac12301c89d815af62 + size: 23029 + - path: combined/plots/compressor_metric_vs_predict_time.pdf hash: md5 - md5: b0ae22713c6a319a24acb69525a9f01a.dir - size: 1375974 - nfiles: 1536 - grid_search@20-ddos-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20 - data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=ddos/logs/gzip_logistic/20 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/20/study.csv - files.directory=ddos files.reports=reports/gzip_logistic/20 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_logistic --multirun + md5: 7d690d0d9381702841645a8cc47c4228 + size: 23691 + - path: combined/plots/compressor_metric_vs_train_time.pdf + hash: md5 + md5: 7684f9f2d3fd807f5ca0791947a4f495 + size: 23217 + - path: combined/plots/condensing_methods_vs_accuracy.pdf + hash: md5 + md5: ee93a76c66f25ab3f33d04e66dbc6c89 + size: 61419 + - path: combined/plots/condensing_methods_vs_predict_time.pdf + hash: md5 + md5: c4d4d6309ccb922f0896c0682ebc62bb + size: 75130 + - path: combined/plots/condensing_methods_vs_train_time.pdf + hash: md5 + md5: 5630caa9d7cd712e9eade1e3f1f989ce + size: 74744 + - path: combined/plots/models_vs_accuracy.pdf + hash: md5 + md5: 89fbf635c37ad049a9d7581c819232fb + size: 44138 + - path: combined/plots/models_vs_predict_time.pdf + hash: md5 + md5: 7426493cc2eea4a3c795774dca34c3d7 + size: 52991 + - path: combined/plots/models_vs_train_time.pdf + hash: md5 + md5: 8e94cfaf2d29f7900c5a79b728d22a3d + size: 52701 + - path: combined/plots/string_metric_vs_accuracy.pdf + hash: md5 + md5: 5da7b7e5fd2f428af3936550d29149ea + size: 24176 + - path: combined/plots/string_metric_vs_predict_time.pdf + hash: md5 + md5: ca75801d85720c0bab65447ab9310868 + size: 24398 + - path: combined/plots/string_metric_vs_train_time.pdf + hash: md5 + md5: 9053fd4d1b86e8a6453c7862b2b7483a + size: 24458 + - path: combined/plots/symmetric_models_vs_accuracy.pdf + hash: md5 + md5: 14906a8e21db525a46910f6cc9776b37 + size: 64101 + - path: combined/plots/symmetric_models_vs_predict_time.pdf + hash: md5 + md5: 20bbaa2bd5fb395b8d579246d0364937 + size: 80822 + - path: combined/plots/symmetric_models_vs_train_time.pdf + hash: md5 + md5: b38a529d8bfd5dd25d8ffb4b57859225 + size: 81185 + copy@combined: + cmd: rm -rf ~/Gzip-KNN/figs/combined/ && mkdir -p ~/Gzip-KNN/figs/combined/ && + cp -r combined/plots/* ~/Gzip-KNN/figs/combined/ && rm -rf ~/Gzip-KNN/figs/combined/.gitignore deps: - - path: conf/gzip_logistic.yaml + - path: combined/plots/ hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 - - path: params.yaml + md5: fad9d0d19a575c84c55daa1cbd67b514.dir + size: 34019697 + nfiles: 16 + copy@ddos: + cmd: rm -rf ~/Gzip-KNN/figs/ddos/ && mkdir -p ~/Gzip-KNN/figs/ddos/ && cp -r ddos/plots/* + ~/Gzip-KNN/figs/ddos/ && rm -rf ~/Gzip-KNN/figs/ddos/.gitignore + deps: + - path: ddos/plots/ + hash: md5 + md5: 377bb3bca5774b42a32ad343d074462d.dir + size: 21089165 + nfiles: 29 + copy@kdd_nsl: + cmd: rm -rf ~/Gzip-KNN/figs/kdd_nsl/ && mkdir -p ~/Gzip-KNN/figs/kdd_nsl/ && cp + -r kdd_nsl/plots/* ~/Gzip-KNN/figs/kdd_nsl/ && rm -rf ~/Gzip-KNN/figs/kdd_nsl/.gitignore + deps: + - path: kdd_nsl/plots/ + hash: md5 + md5: dc76f478efb0cbc46246b1ee240687fe.dir + size: 17691329 + nfiles: 29 + clean_merged: + cmd: python -m deckard.layers.clean_data -i combined/plots/merged.csv -o combined/plots/clean_merged.csv + -c conf/clean.yaml + deps: + - path: combined/plots/merged.csv + hash: md5 + md5: 14b7b6d947a96066ff2ad028680511d5 + size: 33462041 + - path: conf/clean.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 3fdcad8f5751398ace2b94aaa74e4e18 + size: 1023 params: - conf/gzip_logistic.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.id} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 - params: - +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.fit_intercept: True,False - +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_logistic - outs: - - path: ddos/logs/gzip_logistic/20 + conf/clean.yaml: + drop_values: + accuracy: 0.0 + predict_time: 1.0 + replace: + model.init.metric: + jaro: Jaro + _winkler: -Winkler + levenshtein: Levenshtein + ncd: NCD + ratio: Ratio + seqRatio: SeqRatio + hamming: Hamming + gzip: GZIP + pkl: Pickle + bz2: BZ2 + zstd: ZSTD + lzma: LZMA + model_name: + GzipSVC: k-SVC + GzipLogisticRegressor: k-Logistic + GzipKNN: k-KNN + model.init.symmetric: + true: Symmetric + false: Asymmetric + model.init.sampling_method: + random: Random + medoid: Medoid + sum: Sum + svc: SVC + hardness: Hardness + nearmiss: NearMiss + knn: KNN + dataset: + ddos: DDoS + sms_spam: SMS Spam + kdd_nsl: KDD NSL + truthseeker: Truthseeker + model.init.m: + -1: 1 + replace_cols: + dataset: Dataset + model.init.metric: Metric + model.init.symmetric: Symmetric + model.init.sampling_method: Condensing Method + model.init.m: Condensing Ratio + model_name: Model + outs: + - path: combined/plots/clean_merged.csv + hash: md5 + md5: c156f464018e66193d396f270be55786 + size: 33579589 + data: + cmd: python data_prep.py + deps: + - path: data_prep.py hash: md5 - md5: f2c036dc149976bc0de5187f8661669d.dir - size: 1705246 - nfiles: 514 - - path: ddos/reports/gzip_logistic/20/train/ + md5: 18244c921ed2d7cbf25b8362b3ca33aa + size: 5146 + outs: + - path: raw_data/ hash: md5 - md5: 36eee9b3fb432eafed577ca45b477dab.dir - size: 1608552 - nfiles: 1349 - grid_search@20-ddos-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20 - data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=ddos/logs/gzip_svc/20 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/20/study.csv - files.directory=ddos files.reports=reports/gzip_svc/20 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_svc --multirun + md5: 33d46673e0631bef98be9e8991ed1ed1.dir + size: 50328647 + nfiles: 8 + parse_params: + cmd: python -m deckard.layers.parse deps: - - path: conf/gzip_svc.yaml + - path: conf/data/default.yaml + hash: md5 + md5: 86639d6672cfd9529dda3e2ae4036c01 + size: 22 + - path: conf/default.yaml + hash: md5 + md5: a0a533f84a7ffce197e0db5439219faf + size: 1504 + - path: conf/files/default.yaml + hash: md5 + md5: 7a2df5f8b98699376c3fb4da05d70dea + size: 306 + - path: conf/model/default.yaml + hash: md5 + md5: 39dc7512b1d19fea54550b080d880153 + size: 27 + - path: conf/scorers/default.yaml + hash: md5 + md5: d8d00e7d284ea68b1244743dfef8f00c + size: 280 + outs: + - path: params.yaml hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 + train: + cmd: python -m deckard.layers.experiment train + deps: - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 + - path: raw_data/ + hash: md5 + md5: 33d46673e0631bef98be9e8991ed1ed1.dir + size: 50328647 + nfiles: 8 params: - conf/gzip_svc.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.id} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: - - maximize - metric_names: - - accuracy - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} - storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 - params: - +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.gamma: scale,auto - +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_svc + params.yaml: + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss + outs: + - path: kdd_nsl/reports/train/default/predictions.json + hash: md5 + md5: 986d2f0abe9b96253b196a222a550609 + size: 702 + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: 492e1219d803759a686caa2859c91d21 + size: 485 + test_each_model@gzip-gzip_logistic-sms_spam-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_logistic/gzip/20 + files.directory=sms_spam data=sms_spam data.sample.train_size=20 dataset=sms_spam + model=gzip_logistic model_name=gzip_knn model.init.metric=gzip model.init.m=-1 + hydra.run.dir=sms_spam/logs/test_each_model/gzip_logistic/gzip/20 ++raise_exception=True ' + deps: + - path: kdd_nsl/reports/train/default/score_dict.json + hash: md5 + md5: ee4344da4a735fb0b6e6d2cf83ddef6e + size: 484 + - path: params.yaml + hash: md5 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 + params: + params.yaml: + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss outs: - - path: ddos/logs/gzip_svc/20 + - path: sms_spam/logs/test_each_model/gzip_logistic/gzip/20 hash: md5 - md5: 5934a7b63c96844a0eaa9ecea06a79c2.dir - size: 1639820 - nfiles: 514 - - path: ddos/reports/gzip_svc/20/train/ + md5: d121a07eb6c0e96c7cd18fe1f2d0fbd6.dir + size: 7950 + nfiles: 4 + - path: sms_spam/reports/test_each_model/gzip_logistic/gzip/20/score_dict.json hash: md5 - md5: 0e902831c38cc7b2f2b03d7bb7f4f5cf.dir - size: 1580188 - nfiles: 1536 - grid_search@100-kdd_nsl-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=kdd_nsl/logs/gzip_knn/100 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/100/study.csv - files.directory=kdd_nsl files.reports=reports/gzip_knn/100 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_knn --multirun + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_model@gzip-gzip_knn-ddos-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_knn/gzip/20 + files.directory=ddos data=ddos data.sample.train_size=20 dataset=ddos model=gzip_knn + model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=ddos/logs/test_each_model/gzip_knn/gzip/20 + ++raise_exception=True ' deps: - - path: conf/gzip_knn.yaml + - path: kdd_nsl/reports/train/default/score_dict.json hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 + md5: ee4344da4a735fb0b6e6d2cf83ddef6e + size: 484 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_knn.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - direction: ${direction} - storage: sqlite:///optuna.db - study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 - max_failure_rate: 1.0 - params: - model.init.k: 1,3,5,7,11 - +model.init.weights: uniform,distance - +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r + params.yaml: + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss outs: - - path: kdd_nsl/logs/gzip_knn/100 + - path: ddos/logs/test_each_model/gzip_knn/gzip/20 hash: md5 - md5: aa2209bce9b2f829ca22f244b53ed58f.dir - size: 1416182 - nfiles: 514 - - path: kdd_nsl/reports/gzip_knn/100/train/ + md5: 3a4d1598b93a5a00ffd486b26a568475.dir + size: 7826 + nfiles: 4 + - path: ddos/reports/test_each_model/gzip_knn/gzip/20/score_dict.json hash: md5 - md5: 1547fa66fbaac37a7badef9b300577a7.dir - size: 1163933 - nfiles: 1000 - grid_search@100-kdd_nsl-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic - model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_kdd_nsl - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/100 - hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/100/study.csv - files.directory=kdd_nsl files.reports=reports/gzip_logistic/100 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_logistic --multirun + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_model@gzip-gzip_svc-sms_spam-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_svc/gzip/20 + files.directory=sms_spam data=sms_spam data.sample.train_size=20 dataset=sms_spam + model=gzip_svc model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=sms_spam/logs/test_each_model/gzip_svc/gzip/20 + ++raise_exception=True ' deps: - - path: conf/gzip_logistic.yaml + - path: kdd_nsl/reports/train/default/score_dict.json hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 + md5: ee4344da4a735fb0b6e6d2cf83ddef6e + size: 484 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_logistic.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.id} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 - params: - +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.fit_intercept: True,False - +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_logistic + params.yaml: + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss outs: - - path: kdd_nsl/logs/gzip_logistic/100 + - path: sms_spam/logs/test_each_model/gzip_svc/gzip/20 hash: md5 - md5: b6e7cf1d3984f8029177576f9668944b.dir - size: 1609157 - nfiles: 514 - - path: kdd_nsl/reports/gzip_logistic/100/train/ + md5: ac59a56d56834986ab013ff5cb6b4448.dir + size: 7861 + nfiles: 4 + - path: sms_spam/reports/test_each_model/gzip_svc/gzip/20/score_dict.json hash: md5 - md5: d40db4814c403a903c7d0cd2a8a5bb7b.dir - size: 1329546 - nfiles: 1093 - grid_search@100-kdd_nsl-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=kdd_nsl/logs/gzip_svc/100 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/100/study.csv - files.directory=kdd_nsl files.reports=reports/gzip_svc/100 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_svc --multirun + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_model@gzip-gzip_knn-sms_spam-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_knn/gzip/20 + files.directory=sms_spam data=sms_spam data.sample.train_size=20 dataset=sms_spam + model=gzip_knn model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=sms_spam/logs/test_each_model/gzip_knn/gzip/20 + ++raise_exception=True ' deps: - - path: conf/gzip_svc.yaml + - path: kdd_nsl/reports/train/default/score_dict.json hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 + md5: ee4344da4a735fb0b6e6d2cf83ddef6e + size: 484 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_svc.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.id} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: - - maximize - metric_names: - - accuracy - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} - storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 - params: - +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.gamma: scale,auto - +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_svc + params.yaml: + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss outs: - - path: kdd_nsl/logs/gzip_svc/100 + - path: sms_spam/logs/test_each_model/gzip_knn/gzip/20 hash: md5 - md5: 4b96e2a3bb0e0d230ebd96591a16e441.dir - size: 1553624 - nfiles: 514 - - path: kdd_nsl/reports/gzip_svc/100/train/ + md5: 4eaee5c6d9a4ad7d474938026f330e8c.dir + size: 7858 + nfiles: 4 + - path: sms_spam/reports/test_each_model/gzip_knn/gzip/20/score_dict.json hash: md5 - md5: 3cf8a86de1026ead8fcd1b6cda47e910.dir - size: 1247698 - nfiles: 1152 - grid_search@100-truthseeker-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=truthseeker/logs/gzip_knn/100 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/100/study.csv - files.directory=truthseeker files.reports=reports/gzip_knn/100 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_knn --multirun + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_model@gzip-gzip_svc-truthseeker-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_svc/gzip/20 + files.directory=truthseeker data=truthseeker data.sample.train_size=20 dataset=truthseeker + model=gzip_svc model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=truthseeker/logs/test_each_model/gzip_svc/gzip/20 + ++raise_exception=True ' deps: - - path: conf/gzip_knn.yaml + - path: kdd_nsl/reports/train/default/score_dict.json hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 + md5: ee4344da4a735fb0b6e6d2cf83ddef6e + size: 484 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_knn.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - direction: ${direction} - storage: sqlite:///optuna.db - study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 - max_failure_rate: 1.0 - params: - model.init.k: 1,3,5,7,11 - +model.init.weights: uniform,distance - +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r + params.yaml: + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss outs: - - path: truthseeker/logs/gzip_knn/100 - hash: md5 - md5: 818cba0a8349442987e5d6be1f0672d4.dir - size: 1374869 - nfiles: 514 - - path: truthseeker/reports/gzip_knn/100/train/ - hash: md5 - md5: 261a37d5d497bd477d872aa72a94a13f.dir - size: 394446 - nfiles: 320 - grid_search@100-truthseeker-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic - model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_truthseeker - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/100 - hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/100/study.csv - files.directory=truthseeker files.reports=reports/gzip_logistic/100 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_logistic --multirun - deps: - - path: conf/gzip_logistic.yaml - hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - conf/gzip_logistic.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.id} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 - params: - +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.fit_intercept: True,False - +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_logistic - outs: - - path: truthseeker/logs/gzip_logistic/100 + - path: truthseeker/logs/test_each_model/gzip_svc/gzip/20 hash: md5 - md5: dd822b92438871be421644a82afa8e2f.dir - size: 1528739 - nfiles: 514 - - path: truthseeker/reports/gzip_logistic/100/train/ + md5: 5fb0774e1c5387d988a28d68900d7d02.dir + size: 7924 + nfiles: 4 + - path: truthseeker/reports/test_each_model/gzip_svc/gzip/20/score_dict.json hash: md5 - md5: d1b22149466a949b86aba9390d7cf992.dir - size: 556386 - nfiles: 365 - grid_search@100-truthseeker-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=truthseeker/logs/gzip_svc/100 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/100/study.csv - files.directory=truthseeker files.reports=reports/gzip_svc/100 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_svc --multirun + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_model@gzip-gzip_logistic-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_logistic/gzip/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_logistic model_name=gzip_knn model.init.metric=gzip model.init.m=-1 + hydra.run.dir=kdd_nsl/logs/test_each_model/gzip_logistic/gzip/20 ++raise_exception=True ' deps: - - path: conf/gzip_svc.yaml + - path: kdd_nsl/reports/train/default/score_dict.json hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 + md5: ee4344da4a735fb0b6e6d2cf83ddef6e + size: 484 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_svc.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.id} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: - - maximize - metric_names: - - accuracy - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} - storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 - params: - +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.gamma: scale,auto - +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_svc + params.yaml: + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss outs: - - path: truthseeker/logs/gzip_svc/100 + - path: kdd_nsl/logs/test_each_model/gzip_logistic/gzip/20 hash: md5 - md5: c9493ae71545ccec0ea01adc6d664bce.dir - size: 1505603 - nfiles: 514 - - path: truthseeker/reports/gzip_svc/100/train/ + md5: ec6c44a8421f7cb02994bafbb0ceb59d.dir + size: 7980 + nfiles: 4 + - path: kdd_nsl/reports/test_each_model/gzip_logistic/gzip/20/score_dict.json hash: md5 - md5: c9a4bae4aed04fcdb578f44fba94af87.dir - size: 547282 - nfiles: 384 - grid_search@100-sms_spam-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=sms_spam/logs/gzip_knn/100 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/100/study.csv - files.directory=sms_spam files.reports=reports/gzip_knn/100 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_knn --multirun + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_model@gzip-gzip_logistic-truthseeker-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_logistic/gzip/20 + files.directory=truthseeker data=truthseeker data.sample.train_size=20 dataset=truthseeker + model=gzip_logistic model_name=gzip_knn model.init.metric=gzip model.init.m=-1 + hydra.run.dir=truthseeker/logs/test_each_model/gzip_logistic/gzip/20 ++raise_exception=True ' deps: - - path: conf/gzip_knn.yaml + - path: kdd_nsl/reports/train/default/score_dict.json hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 + md5: ee4344da4a735fb0b6e6d2cf83ddef6e + size: 484 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_knn.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - direction: ${direction} - storage: sqlite:///optuna.db - study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 - max_failure_rate: 1.0 - params: - model.init.k: 1,3,5,7,11 - +model.init.weights: uniform,distance - +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r + params.yaml: + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss outs: - - path: sms_spam/logs/gzip_knn/100 - hash: md5 - md5: ad8714bbbce96d2c1ff75deda0add5ec.dir - size: 1415136 - nfiles: 514 - - path: sms_spam/reports/gzip_knn/100/train/ - hash: md5 - md5: 6bcf048da228e84a757916c797891044.dir - size: 376546 - nfiles: 331 - find_best_model@ddos-gzip_knn: - cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name - gzip_knn_ddos --config_subdir model --params_file best_gzip_knn_ddos --default_config - gzip_knn - deps: - - path: ddos/logs/gzip_knn/ - hash: md5 - md5: d2c6441e85e3509b8968240a48196d07.dir - size: 4193267 - nfiles: 1542 - outs: - - path: conf/model/best_gzip_knn_ddos.yaml - hash: md5 - md5: bdea475d3a2bc59106f27dccd0fc27fc - size: 419 - find_best_model@ddos-gzip_svc: - cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name - gzip_svc_ddos --config_subdir model --params_file best_gzip_svc_ddos --default_config - gzip_svc - deps: - - path: ddos/logs/gzip_svc/ - hash: md5 - md5: 78cd23f301a93a7c9842abb061e3cc7b.dir - size: 7447727 - nfiles: 2570 - outs: - - path: conf/model/best_gzip_svc_ddos.yaml - hash: md5 - md5: 3a7f27dd470ec9e55c10403814f550f2 - size: 442 - find_best_model@ddos-gzip_logistic: - cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name - gzip_logistic_ddos --config_subdir model --params_file best_gzip_logistic_ddos - --default_config gzip_logistic - deps: - - path: ddos/logs/gzip_logistic/ + - path: truthseeker/logs/test_each_model/gzip_logistic/gzip/20 hash: md5 - md5: b28cadbd10b9bbe40802e39b1beaee18.dir - size: 6561328 - nfiles: 2056 - outs: - - path: conf/model/best_gzip_logistic_ddos.yaml + md5: 2ade09315cc26a4d65dbc22a657bfdec.dir + size: 8013 + nfiles: 4 + - path: truthseeker/reports/test_each_model/gzip_logistic/gzip/20/score_dict.json hash: md5 - md5: d5e603d6386dd6cf1167088eaecbdde5 - size: 498 - condense@ddos-knn: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 - data.sample.test_size=100 model_name=condensed_knn model=gzip_knn files.directory=ddos - files.reports=reports/condense/knn/ hydra.sweeper.study_name=condense_knn_ddos - hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/knn/ - hydra.callbacks.study_dump.output_file=ddos/logs/knn/study.csv hydra.launcher.n_jobs=-1 - --config-name condense_knn --multirun + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_model@gzip-gzip_svc-kdd_nsl-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_svc/gzip/20 + files.directory=kdd_nsl data=kdd_nsl data.sample.train_size=20 dataset=kdd_nsl + model=gzip_svc model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=kdd_nsl/logs/test_each_model/gzip_svc/gzip/20 + ++raise_exception=True ' deps: - - path: conf/condense_knn.yaml + - path: kdd_nsl/reports/train/default/score_dict.json hash: md5 - md5: abd25d17a742e467d39dda34b448ba88 - size: 2181 + md5: ee4344da4a735fb0b6e6d2cf83ddef6e + size: 484 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/condense.yaml: - hydra: - run: - dir: ${dataset}/logs/condense/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 - direction: ${direction} - max_failure_rate: 1.0 - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r + params.yaml: + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss outs: - - path: ddos/logs/condense/knn/ + - path: kdd_nsl/logs/test_each_model/gzip_svc/gzip/20 hash: md5 - md5: 34f8b7196af71d106965513050a254fb.dir - size: 10910937 - nfiles: 4097 - - path: ddos/reports/condense/knn/ + md5: 80e1fe29c22203d01027107088979db9.dir + size: 7891 + nfiles: 4 + - path: kdd_nsl/reports/test_each_model/gzip_svc/gzip/20/score_dict.json hash: md5 - md5: 9b6918814be3bea732abc71b8684fd8d.dir - size: 8458502 - nfiles: 9157 - condense@ddos-svc: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 - data.sample.test_size=100 model_name=condensed_svc model=gzip_svc files.directory=ddos - files.reports=reports/condense/svc/ hydra.sweeper.study_name=condense_svc_ddos - hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/svc/ - hydra.callbacks.study_dump.output_file=ddos/logs/svc/study.csv hydra.launcher.n_jobs=-1 - --config-name condense_svc --multirun + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + test_each_model@gzip-gzip_knn-truthseeker-20: + cmd: 'python -m deckard.layers.optimise stage=test_each_model files.name=gzip_knn/gzip/20 + files.directory=truthseeker data=truthseeker data.sample.train_size=20 dataset=truthseeker + model=gzip_knn model_name=gzip_knn model.init.metric=gzip model.init.m=-1 hydra.run.dir=truthseeker/logs/test_each_model/gzip_knn/gzip/20 + ++raise_exception=True ' deps: - - path: conf/model/best_gzip_svc_ddos.yaml + - path: kdd_nsl/reports/train/default/score_dict.json hash: md5 - md5: 3a7f27dd470ec9e55c10403814f550f2 - size: 442 - - path: ddos/logs/method/ + md5: ee4344da4a735fb0b6e6d2cf83ddef6e + size: 484 + - path: params.yaml hash: md5 - md5: a09dd0467b0e8a142d6f32a38f205159.dir - size: 59399 - nfiles: 28 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/condense.yaml: - hydra: - run: - dir: ${dataset}/logs/condense/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 - direction: ${direction} - max_failure_rate: 1.0 - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r + params.yaml: + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + dataset: kdd_nsl + device_id: cpu + files: + _target_: deckard.base.files.FileConfig + data_dir: data + data_type: .csv + directory: kdd_nsl + model_dir: model + name: default + params_file: params.yaml + predictions_file: predictions.json + reports: reports + score_dict_file: score_dict.json + model: + _target_: deckard.base.model.Model + data: + _target_: deckard.base.data.Data + drop: + - id + name: raw_data/kdd_nsl_undersampled_5000.csv + sample: + _target_: deckard.base.data.SklearnDataSampler + random_state: 0 + stratify: true + test_size: 100 + train_size: 100 + target: label + init: + _target_: deckard.base.model.ModelInitializer + distance_matrix: kdd_nsl/model/gzip/100-100/0.npz + k: 1 + m: -1 + metric: gzip + name: gzip_classifier.GzipKNN + symmetric: false + library: sklearn + model_name: gzip_knn + scorers: + _target_: deckard.base.scorer.ScorerDict + accuracy: + _target_: deckard.base.scorer.ScorerConfig + direction: maximize + name: sklearn.metrics.accuracy_score + log_loss: + _target_: deckard.base.scorer.ScorerConfig + direction: minimize + name: sklearn.metrics.log_loss outs: - - path: ddos/logs/condense/svc/ + - path: truthseeker/logs/test_each_model/gzip_knn/gzip/20 hash: md5 - md5: 6a15cfc205c7382b8d7d6d67d35ddfb0.dir - size: 11072739 - nfiles: 4097 - - path: ddos/reports/condense/svc/ + md5: e1b4842686f73992f04e9104eab3e88f.dir + size: 7921 + nfiles: 4 + - path: truthseeker/reports/test_each_model/gzip_knn/gzip/20/score_dict.json hash: md5 - md5: daaf428c939e9bfcc233bf88ee39f9fb.dir - size: 2819182 - nfiles: 3072 - condense@ddos-logistic: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 - data.sample.test_size=100 model_name=condensed_logistic model=gzip_logistic - files.directory=ddos files.reports=reports/condense/logistic/ hydra.sweeper.study_name=condense_logistic_ddos - hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/condense/logistic/ - hydra.callbacks.study_dump.output_file=ddos/logs/logistic/study.csv hydra.launcher.n_jobs=-1 - --config-name condense_logistic --multirun + md5: 5d8bf090bc8e34df8ed01766adfca5eb + size: 26 + grid_search@20-ddos-gzip_knn-true: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20 + data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_knn/20/symmetry_true hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/20/study.csv + files.directory=ddos files.reports=reports/gzip_knn/20/symmetry_true hydra.launcher.n_jobs=-1 + ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_knn --multirun deps: - - path: conf/model/best_gzip_logistic_ddos.yaml + - path: conf/gzip_knn.yaml hash: md5 - md5: d5e603d6386dd6cf1167088eaecbdde5 - size: 498 - - path: ddos/logs/method/ + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 + - path: params.yaml hash: md5 - md5: a09dd0467b0e8a142d6f32a38f205159.dir - size: 59399 - nfiles: 28 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/condense.yaml: + conf/gzip_knn.yaml: hydra: run: - dir: ${dataset}/logs/condense/ + dir: ${dataset}/logs/${stage}/ sweep: dir: ??? subdir: ${hydra.job.num} @@ -15530,26 +5610,26 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 128 + n_jobs: 8 max_failure_rate: 1.0 params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model_name: ${model_name} launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -15562,32 +5642,34 @@ stages: max_nbytes: 100000 mmap_mode: r outs: - - path: ddos/logs/condense/logistic/ + - path: ddos/logs/gzip_knn/20/symmetry_true hash: md5 - md5: 064e5768d0155635c9bc6287914ac9f7.dir - size: 11690343 - nfiles: 4097 - - path: ddos/reports/condense/logistic/ + md5: 75a67061f3d261f90a32e2e342a26049.dir + size: 1201059 + nfiles: 513 + - path: ddos/reports/gzip_knn/20/symmetry_true/train/ hash: md5 - md5: 7ce841278929a90690417685b7c7f143.dir - size: 5929815 - nfiles: 5888 - grid_search@100-ddos-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 + md5: 410d4dc9dc529c85056cea27da5fc34f.dir + size: 328616 + nfiles: 369 + grid_search@20-ddos-gzip_knn-false: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=ddos/logs/gzip_knn/100 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/100/study.csv - files.directory=ddos files.reports=reports/gzip_knn/100 hydra.launcher.n_jobs=-1 + model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_knn/20/symmetry_false + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/20/study.csv files.directory=ddos + files.reports=reports/gzip_knn/20/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio ++raise_exception=True --config-name gzip_knn --multirun deps: - path: conf/gzip_knn.yaml hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: conf/gzip_knn.yaml: hydra: @@ -15607,30 +5689,26 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper direction: ${direction} storage: sqlite:///optuna.db study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 + n_trials: 128 + n_jobs: 8 max_failure_rate: 1.0 params: model.init.k: 1,3,5,7,11 +model.init.weights: uniform,distance +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -15642,34 +5720,36 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_knn outs: - - path: ddos/logs/gzip_knn/100 + - path: ddos/logs/gzip_knn/20/symmetry_false hash: md5 - md5: 41af522bae6f35684d51a90652c37082.dir - size: 1645388 - nfiles: 514 - - path: ddos/reports/gzip_knn/100/train/ + md5: 5511994182145eb3145fd3afc672d1a5.dir + size: 1200638 + nfiles: 513 + - path: ddos/reports/gzip_knn/20/symmetry_false/train/ hash: md5 - md5: b9374a5acb2480c2ed6a35803a344f69.dir - size: 1341749 - nfiles: 1499 - grid_search@100-ddos-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 + md5: b507e62340bddb44dd3e66467a23444a.dir + size: 328838 + nfiles: 369 + grid_search@20-ddos-gzip_logistic-true: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=ddos/logs/gzip_logistic/100 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/100/study.csv - files.directory=ddos files.reports=reports/gzip_logistic/100 hydra.launcher.n_jobs=-1 + model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_logistic/20/symmetry_true + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/20/study.csv + files.directory=ddos files.reports=reports/gzip_logistic/20/symmetry_true hydra.launcher.n_jobs=-1 + ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio ++raise_exception=True --config-name gzip_logistic --multirun deps: - path: conf/gzip_logistic.yaml hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: conf/gzip_logistic.yaml: hydra: @@ -15689,31 +5769,27 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 + n_trials: 128 + n_jobs: 8 params: +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) +model.init.fit_intercept: True,False +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -15727,36 +5803,38 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_logistic outs: - - path: ddos/logs/gzip_logistic/100 + - path: ddos/logs/gzip_logistic/20/symmetry_true hash: md5 - md5: 3f1d14c70e73f668316f86a8d7d0e22b.dir - size: 1733688 - nfiles: 514 - - path: ddos/reports/gzip_logistic/100/train/ + md5: 7411fc1827bfc3df75c9106a4288ee8d.dir + size: 1262132 + nfiles: 513 + - path: ddos/reports/gzip_logistic/20/symmetry_true/train/ hash: md5 - md5: c839c1faf70de47c057714c3a8bdc52d.dir - size: 1562420 - nfiles: 1315 - grid_search@100-ddos-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 - data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=ddos/logs/gzip_svc/100 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/100/study.csv - files.directory=ddos files.reports=reports/gzip_svc/100 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_svc --multirun + md5: 72358a4a9191f8e02e2d9348e7bfa5be.dir + size: 601313 + nfiles: 356 + grid_search@20-ddos-gzip_logistic-false: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20 + data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_logistic/20/symmetry_false + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/20/study.csv + files.directory=ddos files.reports=reports/gzip_logistic/20/symmetry_false hydra.launcher.n_jobs=-1 + ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_logistic --multirun deps: - - path: conf/gzip_svc.yaml + - path: conf/gzip_logistic.yaml hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_svc.yaml: + conf/gzip_logistic.yaml: hydra: run: dir: ${dataset}/logs/${stage}/ @@ -15768,37 +5846,33 @@ stages: _target_: database.OptunaStudyDumpCallback storage: ${hydra.sweeper.storage} study_name: ${hydra.sweeper.study_name} - directions: - - maximize - metric_names: - - accuracy + directions: ${direction} + metric_names: ${optimizers} output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 params: - +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.gamma: scale,auto - +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + +model.init.solver: saga + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -15812,159 +5886,77 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_svc - outs: - - path: ddos/logs/gzip_svc/100 - hash: md5 - md5: 4adc8c896e06e2d7e8450f2b863b95bf.dir - size: 1681042 - nfiles: 514 - - path: ddos/reports/gzip_svc/100/train/ - hash: md5 - md5: 8ad9bbb8a118699458753528a263f5ba.dir - size: 1790102 - nfiles: 1678 - find_best_model@kdd_nsl-gzip_knn: - cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name - gzip_knn_kdd_nsl --config_subdir model --params_file best_gzip_knn_kdd_nsl --default_config - gzip_knn - deps: - - path: kdd_nsl/logs/gzip_knn/ - hash: md5 - md5: 6418750af32f15be9c6f35e0975b3276.dir - size: 4024441 - nfiles: 1542 outs: - - path: conf/model/best_gzip_knn_kdd_nsl.yaml - hash: md5 - md5: f9ad25a19931041146b4b1eab45fda68 - size: 420 - find_best_model@kdd_nsl-gzip_svc: - cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name - gzip_svc_kdd_nsl --config_subdir model --params_file best_gzip_svc_kdd_nsl --default_config - gzip_svc - deps: - - path: kdd_nsl/logs/gzip_svc/ - hash: md5 - md5: 381879c377b6eeccbb9d1aa42f78fec2.dir - size: 4366326 - nfiles: 1542 - outs: - - path: conf/model/best_gzip_svc_kdd_nsl.yaml - hash: md5 - md5: 0542c20ce7b5a74a20d4ab1c38fdf213 - size: 434 - find_best_model@kdd_nsl-gzip_logistic: - cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name - gzip_logistic_kdd_nsl --config_subdir model --params_file best_gzip_logistic_kdd_nsl - --default_config gzip_logistic - deps: - - path: kdd_nsl/logs/gzip_logistic/ + - path: ddos/logs/gzip_logistic/20/symmetry_false hash: md5 - md5: 34325e24d16a4af0ec3286ec4b034e14.dir - size: 4504884 - nfiles: 1542 - outs: - - path: conf/model/best_gzip_logistic_kdd_nsl.yaml - hash: md5 - md5: e21d828b4b1ad122d7755e986de5b93d - size: 353 - find_best_model@sms_spam-gzip_knn: - cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name - gzip_knn_sms_spam --config_subdir model --params_file best_gzip_knn_sms_spam - --default_config gzip_knn - deps: - - path: sms_spam/logs/gzip_knn/ - hash: md5 - md5: 689c69db8c621101649ddef5bd0c1bb5.dir - size: 2713750 - nfiles: 1028 - outs: - - path: conf/model/best_gzip_knn_sms_spam.yaml - hash: md5 - md5: 41fad710bcb8b8b8dd548d669b2ed748 - size: 419 - find_best_model@sms_spam-gzip_svc: - cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name - gzip_svc_sms_spam --config_subdir model --params_file best_gzip_svc_sms_spam - --default_config gzip_svc - deps: - - path: sms_spam/logs/gzip_svc/ + md5: 49dbe43b3f37ddc7ac2ae83c9022067e.dir + size: 1243003 + nfiles: 513 + - path: ddos/reports/gzip_logistic/20/symmetry_false/train/ hash: md5 - md5: b91e15f0eb5ee57aed8aeb5a5d6feeab.dir - size: 2777710 - nfiles: 1028 - outs: - - path: conf/model/best_gzip_svc_sms_spam.yaml - hash: md5 - md5: bb3008613c3311a696d32fb683732c00 - size: 442 - find_best_model@sms_spam-gzip_logistic: - cmd: python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name - gzip_logistic_sms_spam --config_subdir model --params_file best_gzip_logistic_sms_spam - --default_config gzip_logistic + md5: 311ef4395865656e00f5428c8f98b19a.dir + size: 616599 + nfiles: 340 + grid_search@20-ddos-gzip_svc-true: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20 + data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_svc/20/symmetry_true hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/20/study.csv + files.directory=ddos files.reports=reports/gzip_svc/20/symmetry_true hydra.launcher.n_jobs=-1 + ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_svc --multirun deps: - - path: sms_spam/logs/gzip_logistic/ - hash: md5 - md5: 89191dbe147b40192129776ef2652900.dir - size: 1649284 - nfiles: 578 - outs: - - path: conf/model/best_gzip_logistic_sms_spam.yaml + - path: conf/gzip_svc.yaml hash: md5 - md5: fd1d0481be57844d935aea28e995a369 - size: 485 - condense@kdd_nsl-knn: - cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_knn - model=gzip_knn files.directory=kdd_nsl files.reports=reports/condense/knn/ hydra.sweeper.study_name=condense_knn_kdd_nsl - hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/condense/knn/ - hydra.callbacks.study_dump.output_file=kdd_nsl/logs/knn/study.csv hydra.launcher.n_jobs=-1 - --config-name condense_knn --multirun - deps: - - path: conf/model/best_gzip_knn_kdd_nsl.yaml + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 + - path: params.yaml hash: md5 - md5: f9ad25a19931041146b4b1eab45fda68 - size: 420 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/condense.yaml: + conf/gzip_svc.yaml: hydra: run: - dir: ${dataset}/logs/condense/ + dir: ${dataset}/logs/${stage}/ sweep: dir: ??? - subdir: ${hydra.job.num} + subdir: ${hydra.job.id} callbacks: study_dump: _target_: database.OptunaStudyDumpCallback storage: ${hydra.sweeper.storage} study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} + directions: + - maximize + metric_names: + - accuracy output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} + study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model_name: ${model_name} direction: ${direction} max_failure_rate: 1.0 - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -15977,67 +5969,76 @@ stages: max_nbytes: 100000 mmap_mode: r outs: - - path: kdd_nsl/logs/condense/knn/ + - path: ddos/logs/gzip_svc/20/symmetry_true hash: md5 - md5: 81f50250e51650881283dcf68d43234c.dir - size: 10952920 - nfiles: 4097 - - path: kdd_nsl/reports/condense/knn/ + md5: 51fb64b0b4069b3a551837dd9602b50c.dir + size: 1235122 + nfiles: 513 + - path: ddos/reports/gzip_svc/20/symmetry_true/train/ hash: md5 - md5: 3f8eb680f1f8960490e4581bfa16cfd2.dir - size: 2869636 - nfiles: 3011 - condense@kdd_nsl-svc: - cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_svc - model=gzip_svc files.directory=kdd_nsl files.reports=reports/condense/svc/ hydra.sweeper.study_name=condense_svc_kdd_nsl - hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/condense/svc/ - hydra.callbacks.study_dump.output_file=kdd_nsl/logs/svc/study.csv hydra.launcher.n_jobs=-1 - --config-name condense_svc --multirun + md5: 22b4b6a8d2e3861aedf0e4f43917ba72.dir + size: 551301 + nfiles: 384 + grid_search@20-ddos-gzip_svc-false: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=20 + data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_svc/20/symmetry_false + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/20/study.csv files.directory=ddos + files.reports=reports/gzip_svc/20/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_svc --multirun deps: - - path: conf/model/best_gzip_svc_kdd_nsl.yaml + - path: conf/gzip_svc.yaml + hash: md5 + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 + - path: params.yaml hash: md5 - md5: 0542c20ce7b5a74a20d4ab1c38fdf213 - size: 434 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/condense.yaml: + conf/gzip_svc.yaml: hydra: run: - dir: ${dataset}/logs/condense/ + dir: ${dataset}/logs/${stage}/ sweep: dir: ??? - subdir: ${hydra.job.num} + subdir: ${hydra.job.id} callbacks: study_dump: _target_: database.OptunaStudyDumpCallback storage: ${hydra.sweeper.storage} study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} + directions: + - maximize + metric_names: + - accuracy output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} + study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model_name: ${model_name} direction: ${direction} max_failure_rate: 1.0 - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -16050,33 +6051,39 @@ stages: max_nbytes: 100000 mmap_mode: r outs: - - path: kdd_nsl/logs/condense/svc/ + - path: ddos/logs/gzip_svc/20/symmetry_false hash: md5 - md5: cdf319e0c94e4c6eda84ec9b2e9ea1a9.dir - size: 10708020 - nfiles: 4097 - - path: kdd_nsl/reports/condense/svc/ + md5: 2440c70c069be012281ec7412d211422.dir + size: 1234738 + nfiles: 513 + - path: ddos/reports/gzip_svc/20/symmetry_false/train/ hash: md5 - md5: ad27897c6454024915fdcef827219bd3.dir - size: 8340639 - nfiles: 5462 - condense@kdd_nsl-logistic: + md5: 83c44eacdc2b26fd6264cfb781ea7c54.dir + size: 551571 + nfiles: 384 + grid_search@20-kdd_nsl-gzip_knn-true: cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_logistic - model=gzip_logistic files.directory=kdd_nsl files.reports=reports/condense/logistic/ - hydra.sweeper.study_name=condense_logistic_kdd_nsl hydra.sweeper.n_trials=1024 - hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/condense/logistic/ hydra.callbacks.study_dump.output_file=kdd_nsl/logs/logistic/study.csv - hydra.launcher.n_jobs=-1 --config-name condense_logistic --multirun + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_knn/20/symmetry_true + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/20/study.csv files.directory=kdd_nsl + files.reports=reports/gzip_knn/20/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_knn --multirun deps: - - path: conf/model/best_gzip_logistic_kdd_nsl.yaml + - path: conf/gzip_knn.yaml hash: md5 - md5: e21d828b4b1ad122d7755e986de5b93d - size: 353 + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 + - path: params.yaml + hash: md5 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/condense.yaml: + conf/gzip_knn.yaml: hydra: run: - dir: ${dataset}/logs/condense/ + dir: ${dataset}/logs/${stage}/ sweep: dir: ??? subdir: ${hydra.job.num} @@ -16091,26 +6098,26 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 128 + n_jobs: 8 max_failure_rate: 1.0 params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model_name: ${model_name} launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -16123,33 +6130,39 @@ stages: max_nbytes: 100000 mmap_mode: r outs: - - path: kdd_nsl/logs/condense/logistic/ + - path: kdd_nsl/logs/gzip_knn/20/symmetry_true hash: md5 - md5: 0ce56c12dc58fe66c1fa6fec867b2cf5.dir - size: 11710344 - nfiles: 4097 - - path: kdd_nsl/reports/condense/logistic/ + md5: 677d1cdd68cb84a67d83107fc6925c3c.dir + size: 1196876 + nfiles: 513 + - path: kdd_nsl/reports/gzip_knn/20/symmetry_true/train/ hash: md5 - md5: ae358823518ca6759ddfa8d1c738e367.dir - size: 3101125 - nfiles: 2948 - condense@truthseeker-knn: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_knn - model=gzip_knn files.directory=truthseeker files.reports=reports/condense/knn/ - hydra.sweeper.study_name=condense_knn_truthseeker hydra.sweeper.n_trials=1024 - hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/knn/ hydra.callbacks.study_dump.output_file=truthseeker/logs/knn/study.csv - hydra.launcher.n_jobs=-1 --config-name condense_knn --multirun + md5: bb50d06bc8b2fd621dd0a417273884cc.dir + size: 341291 + nfiles: 356 + grid_search@20-kdd_nsl-gzip_knn-false: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_knn/20/symmetry_false + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/20/study.csv files.directory=kdd_nsl + files.reports=reports/gzip_knn/20/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_knn --multirun deps: - - path: conf/model/best_gzip_knn_truthseeker.yaml + - path: conf/gzip_knn.yaml + hash: md5 + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 + - path: params.yaml hash: md5 - md5: 79baf4709c4a5f2535059ef8d1b6a082 - size: 258 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/condense.yaml: + conf/gzip_knn.yaml: hydra: run: - dir: ${dataset}/logs/condense/ + dir: ${dataset}/logs/${stage}/ sweep: dir: ??? subdir: ${hydra.job.num} @@ -16164,26 +6177,26 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 128 + n_jobs: 8 max_failure_rate: 1.0 params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model_name: ${model_name} launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -16196,36 +6209,43 @@ stages: max_nbytes: 100000 mmap_mode: r outs: - - path: truthseeker/logs/condense/knn/ + - path: kdd_nsl/logs/gzip_knn/20/symmetry_false hash: md5 - md5: 3e8b9011ee1c591904115e67db9a1a50.dir - size: 11038890 - nfiles: 4097 - - path: truthseeker/reports/condense/knn/ + md5: 8876b4cdea08cacd9fabea8b7c7e339b.dir + size: 1180969 + nfiles: 513 + - path: kdd_nsl/reports/gzip_knn/20/symmetry_false/train/ hash: md5 - md5: 1565eb2348976cc6ac9108396141080b.dir - size: 2831604 - nfiles: 3016 - condense@truthseeker-svc: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_svc - model=gzip_svc files.directory=truthseeker files.reports=reports/condense/svc/ - hydra.sweeper.study_name=condense_svc_truthseeker hydra.sweeper.n_trials=1024 - hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/svc/ hydra.callbacks.study_dump.output_file=truthseeker/logs/svc/study.csv - hydra.launcher.n_jobs=-1 --config-name condense_svc --multirun + md5: 8635540eb47bb367dbac1b7d6d83afde.dir + size: 371913 + nfiles: 345 + grid_search@20-kdd_nsl-gzip_logistic-true: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_kdd_nsl + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/20/symmetry_true + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/20/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_logistic/20/symmetry_true + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_logistic --multirun deps: - - path: conf/model/best_gzip_svc_truthseeker.yaml + - path: conf/gzip_logistic.yaml + hash: md5 + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 + - path: params.yaml hash: md5 - md5: 97d9d5857744b1cc077513ac5a659f62 - size: 302 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/condense.yaml: + conf/gzip_logistic.yaml: hydra: run: - dir: ${dataset}/logs/condense/ + dir: ${dataset}/logs/${stage}/ sweep: dir: ??? - subdir: ${hydra.job.num} + subdir: ${hydra.job.id} callbacks: study_dump: _target_: database.OptunaStudyDumpCallback @@ -16237,26 +6257,29 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} + study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model_name: ${model_name} direction: ${direction} max_failure_rate: 1.0 - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -16269,36 +6292,43 @@ stages: max_nbytes: 100000 mmap_mode: r outs: - - path: truthseeker/logs/condense/svc/ + - path: kdd_nsl/logs/gzip_logistic/20/symmetry_true hash: md5 - md5: 845724e35dc3a54bea549410a35d6afd.dir - size: 11192018 - nfiles: 4097 - - path: truthseeker/reports/condense/svc/ + md5: 4752da5c6f9e5b19ffa7b85fedaa864d.dir + size: 1271405 + nfiles: 513 + - path: kdd_nsl/reports/gzip_logistic/20/symmetry_true/train/ hash: md5 - md5: 6cbdc47d51df656dcf7e8ae6221795b3.dir - size: 2825163 - nfiles: 3064 - condense@truthseeker-logistic: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_logistic - model=gzip_logistic files.directory=truthseeker files.reports=reports/condense/logistic/ - hydra.sweeper.study_name=condense_logistic_truthseeker hydra.sweeper.n_trials=1024 - hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/condense/logistic/ hydra.callbacks.study_dump.output_file=truthseeker/logs/logistic/study.csv - hydra.launcher.n_jobs=-1 --config-name condense_logistic --multirun + md5: b2fc29717a0256771a595e81e77363c9.dir + size: 604610 + nfiles: 356 + grid_search@20-kdd_nsl-gzip_logistic-false: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_kdd_nsl + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/20/symmetry_false + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/20/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_logistic/20/symmetry_false + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_logistic --multirun deps: - - path: conf/model/best_gzip_logistic_truthseeker.yaml + - path: conf/gzip_logistic.yaml + hash: md5 + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 + - path: params.yaml hash: md5 - md5: 448e12c542f48c074057e9374743d61e - size: 326 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/condense.yaml: + conf/gzip_logistic.yaml: hydra: run: - dir: ${dataset}/logs/condense/ + dir: ${dataset}/logs/${stage}/ sweep: dir: ??? - subdir: ${hydra.job.num} + subdir: ${hydra.job.id} callbacks: study_dump: _target_: database.OptunaStudyDumpCallback @@ -16310,26 +6340,29 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} + study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model_name: ${model_name} direction: ${direction} max_failure_rate: 1.0 - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -16342,71 +6375,76 @@ stages: max_nbytes: 100000 mmap_mode: r outs: - - path: truthseeker/logs/condense/logistic/ + - path: kdd_nsl/logs/gzip_logistic/20/symmetry_false hash: md5 - md5: f7e754346e500d1b007b519d86f4c608.dir - size: 11847643 - nfiles: 4097 - - path: truthseeker/reports/condense/logistic/ + md5: 24f796fd29b950df2c9d7eb53db47cd2.dir + size: 1260414 + nfiles: 513 + - path: kdd_nsl/reports/gzip_logistic/20/symmetry_false/train/ hash: md5 - md5: 8bd6876fc856ea5bd1e95b54093aedb8.dir - size: 2976098 - nfiles: 3011 - condense@sms_spam-knn: - cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_knn - model=gzip_knn files.directory=sms_spam files.reports=reports/condense/knn/ - hydra.sweeper.study_name=condense_knn_sms_spam hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=sms_spam/logs/condense/knn/ hydra.callbacks.study_dump.output_file=sms_spam/logs/knn/study.csv - hydra.launcher.n_jobs=-1 --config-name condense_knn --multirun + md5: 6f0315fbb05852baa48643f06ed318ad.dir + size: 611076 + nfiles: 347 + grid_search@20-kdd_nsl-gzip_svc-true: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_svc/20/symmetry_true + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/20/study.csv files.directory=kdd_nsl + files.reports=reports/gzip_svc/20/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_svc --multirun deps: - - path: conf/condense_knn.yaml + - path: conf/gzip_svc.yaml hash: md5 - md5: abd25d17a742e467d39dda34b448ba88 - size: 2181 + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/condense.yaml: + conf/gzip_svc.yaml: hydra: run: - dir: ${dataset}/logs/condense/ + dir: ${dataset}/logs/${stage}/ sweep: dir: ??? - subdir: ${hydra.job.num} + subdir: ${hydra.job.id} callbacks: study_dump: _target_: database.OptunaStudyDumpCallback storage: ${hydra.sweeper.storage} study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} + directions: + - maximize + metric_names: + - accuracy output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} + study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model_name: ${model_name} direction: ${direction} max_failure_rate: 1.0 - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -16419,71 +6457,76 @@ stages: max_nbytes: 100000 mmap_mode: r outs: - - path: sms_spam/logs/condense/knn/ + - path: kdd_nsl/logs/gzip_svc/20/symmetry_true hash: md5 - md5: ee1eda16b8989f2a23a7dfeba27b4437.dir - size: 10519093 - nfiles: 4097 - - path: sms_spam/reports/condense/knn/ + md5: 0cbe34f36b1aacc6101ec1d3d6d878eb.dir + size: 1244608 + nfiles: 513 + - path: kdd_nsl/reports/gzip_svc/20/symmetry_true/train/ hash: md5 - md5: 84b8fcb1e78a8685141409736c6d6afa.dir - size: 4713599 - nfiles: 4258 - condense@sms_spam-svc: - cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_svc - model=gzip_svc files.directory=sms_spam files.reports=reports/condense/svc/ - hydra.sweeper.study_name=condense_svc_sms_spam hydra.sweeper.n_trials=1024 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=sms_spam/logs/condense/svc/ hydra.callbacks.study_dump.output_file=sms_spam/logs/svc/study.csv - hydra.launcher.n_jobs=-1 --config-name condense_svc --multirun + md5: 0ea5d4be51518781035dd7e85b700732.dir + size: 554635 + nfiles: 384 + grid_search@20-kdd_nsl-gzip_svc-false: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_svc/20/symmetry_false + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/20/study.csv files.directory=kdd_nsl + files.reports=reports/gzip_svc/20/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_svc --multirun deps: - - path: conf/condense_svc.yaml + - path: conf/gzip_svc.yaml hash: md5 - md5: 7a311db45e697a23a2bed8180fd45e64 - size: 2182 + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/condense.yaml: + conf/gzip_svc.yaml: hydra: run: - dir: ${dataset}/logs/condense/ + dir: ${dataset}/logs/${stage}/ sweep: dir: ??? - subdir: ${hydra.job.num} + subdir: ${hydra.job.id} callbacks: study_dump: _target_: database.OptunaStudyDumpCallback storage: ${hydra.sweeper.storage} study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} + directions: + - maximize + metric_names: + - accuracy output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} + study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model_name: ${model_name} direction: ${direction} max_failure_rate: 1.0 - params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -16496,37 +6539,39 @@ stages: max_nbytes: 100000 mmap_mode: r outs: - - path: sms_spam/logs/condense/svc/ + - path: kdd_nsl/logs/gzip_svc/20/symmetry_false hash: md5 - md5: 9d28ee3f4494d207369bd35c2f5d2164.dir - size: 11082621 - nfiles: 4097 - - path: sms_spam/reports/condense/svc/ + md5: 9eba5cbbd68553f794dec337e9606f52.dir + size: 1244184 + nfiles: 513 + - path: kdd_nsl/reports/gzip_svc/20/symmetry_false/train/ hash: md5 - md5: 200cad31398ec4545e7a490011218c47.dir - size: 4416840 - nfiles: 3068 - condense@sms_spam-logistic: + md5: dc18ba1e036d9b6678d4b97070d84c3c.dir + size: 554884 + nfiles: 384 + grid_search@20-sms_spam-gzip_knn-true: cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=100 data.sample.test_size=100 model_name=condensed_logistic - model=gzip_logistic files.directory=sms_spam files.reports=reports/condense/logistic/ - hydra.sweeper.study_name=condense_logistic_sms_spam hydra.sweeper.n_trials=1024 - hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/condense/logistic/ hydra.callbacks.study_dump.output_file=sms_spam/logs/logistic/study.csv - hydra.launcher.n_jobs=-1 --config-name condense_logistic --multirun + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_knn/20/symmetry_true + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/20/study.csv files.directory=sms_spam + files.reports=reports/gzip_knn/20/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_knn --multirun deps: - - path: conf/condense_logistic.yaml + - path: conf/gzip_knn.yaml hash: md5 - md5: 85b6d1d835afd7e95b5b9f804fbd7119 - size: 2326 + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/condense.yaml: + conf/gzip_knn.yaml: hydra: run: - dir: ${dataset}/logs/condense/ + dir: ${dataset}/logs/${stage}/ sweep: dir: ??? subdir: ${hydra.job.num} @@ -16541,752 +6586,967 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name} - storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 128 + n_jobs: 8 max_failure_rate: 1.0 params: - ++data.sample.train_size: 1000 - ++data.sample.random_state: int(interval(10000, 20000)) - model.init.m: tag(log, interval(.01, .1)) - +model.init.sampling_method: medoid,sum,svc,random,hardness,nearmiss,knn + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model_name: ${model_name} launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 prefer: processes verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - outs: - - path: sms_spam/logs/condense/logistic/ - hash: md5 - md5: 3846050e3a2341b246c2c3366debe0dc.dir - size: 11620551 - nfiles: 4097 - - path: sms_spam/reports/condense/logistic/ - hash: md5 - md5: 05562ae582796b70d35ae7062a5030d7.dir - size: 9597627 - nfiles: 6388 - compile@sms_spam-condense/logistic: - cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/condense/logistic --results_file - sms_spam/reports/condense/logistic.csv - deps: - - path: sms_spam/reports/condense/logistic/ - hash: md5 - md5: 05562ae582796b70d35ae7062a5030d7.dir - size: 9597627 - nfiles: 6388 - outs: - - path: sms_spam/reports/condense/logistic.csv - hash: md5 - md5: 7094b26a582820cc1f88512573ce8c25 - size: 3430438 - compile@kdd_nsl-condense/svc: - cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/condense/svc --results_file - kdd_nsl/reports/condense/svc.csv - deps: - - path: kdd_nsl/reports/condense/svc/ - hash: md5 - md5: ad27897c6454024915fdcef827219bd3.dir - size: 8340639 - nfiles: 5462 - outs: - - path: kdd_nsl/reports/condense/svc.csv - hash: md5 - md5: 643a67cb6d5974a787efa6339e3af058 - size: 3003804 - compile@kdd_nsl-condense/logistic: - cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/condense/logistic --results_file - kdd_nsl/reports/condense/logistic.csv - deps: - - path: kdd_nsl/reports/condense/logistic/ - hash: md5 - md5: df73404e3f7d00371dd55b40e76fa9e0.dir - size: 3112185 - nfiles: 2954 - outs: - - path: kdd_nsl/reports/condense/logistic.csv - hash: md5 - md5: 4193461c63aca8b61956fc443f5bcd3d - size: 1649004 - compile@ddos-condense/svc: - cmd: python -m deckard.layers.compile --report_folder ddos/reports/condense/svc --results_file - ddos/reports/condense/svc.csv - deps: - - path: ddos/reports/condense/svc/ - hash: md5 - md5: b40b878f7eca11a9eae0c19e054bee47.dir - size: 8854939 - nfiles: 7199 - outs: - - path: ddos/reports/condense/svc.csv - hash: md5 - md5: 76b35c3e1dfa2d0476a737f9a41c25c4 - size: 3771755 - compile@truthseeker-condense/knn: - cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/condense/knn --results_file - truthseeker/reports/condense/knn.csv - deps: - - path: truthseeker/reports/condense/knn/ - hash: md5 - md5: 1565eb2348976cc6ac9108396141080b.dir - size: 2831604 - nfiles: 3016 - outs: - - path: truthseeker/reports/condense/knn.csv - hash: md5 - md5: b4ec50d98f613984be6261a059120255 - size: 1595839 - compile@truthseeker-condense/svc: - cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/condense/svc --results_file - truthseeker/reports/condense/svc.csv - deps: - - path: truthseeker/reports/condense/svc/ - hash: md5 - md5: 6cbdc47d51df656dcf7e8ae6221795b3.dir - size: 2825163 - nfiles: 3064 - outs: - - path: truthseeker/reports/condense/svc.csv - hash: md5 - md5: 4cdede4407c88bcda2afc8bbeae91ace - size: 1617655 - compile@ddos-condense/knn: - cmd: python -m deckard.layers.compile --report_folder ddos/reports/condense/knn --results_file - ddos/reports/condense/knn.csv - deps: - - path: ddos/reports/condense/knn/ - hash: md5 - md5: 9b6918814be3bea732abc71b8684fd8d.dir - size: 8458502 - nfiles: 9157 - outs: - - path: ddos/reports/condense/knn.csv - hash: md5 - md5: 0cd0ff58f94fb06093779ff81d37d2bf - size: 4723182 - compile@sms_spam-condense/svc: - cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/condense/svc --results_file - sms_spam/reports/condense/svc.csv - deps: - - path: sms_spam/reports/condense/svc/ - hash: md5 - md5: 200cad31398ec4545e7a490011218c47.dir - size: 4416840 - nfiles: 3068 - outs: - - path: sms_spam/reports/condense/svc.csv - hash: md5 - md5: 32f06cbea623f845dcfa7400d707abad - size: 1573621 - compile@kdd_nsl-condense/knn: - cmd: python -m deckard.layers.compile --report_folder kdd_nsl/reports/condense/knn --results_file - kdd_nsl/reports/condense/knn.csv - deps: - - path: kdd_nsl/reports/condense/knn/ - hash: md5 - md5: 3f8eb680f1f8960490e4581bfa16cfd2.dir - size: 2869636 - nfiles: 3011 - outs: - - path: kdd_nsl/reports/condense/knn.csv - hash: md5 - md5: 29211ec6d9b2b1a5e9193eaabfff3488 - size: 1608857 - compile@truthseeker-condense/logistic: - cmd: python -m deckard.layers.compile --report_folder truthseeker/reports/condense/logistic --results_file - truthseeker/reports/condense/logistic.csv - deps: - - path: truthseeker/reports/condense/logistic/ - hash: md5 - md5: 8bd6876fc856ea5bd1e95b54093aedb8.dir - size: 2976098 - nfiles: 3011 - outs: - - path: truthseeker/reports/condense/logistic.csv - hash: md5 - md5: 5c01852f352ac96150fb36c2df9bcbbf - size: 1648856 - compile@sms_spam-condense/knn: - cmd: python -m deckard.layers.compile --report_folder sms_spam/reports/condense/knn --results_file - sms_spam/reports/condense/knn.csv - deps: - - path: sms_spam/reports/condense/knn/ - hash: md5 - md5: 84b8fcb1e78a8685141409736c6d6afa.dir - size: 4713599 - nfiles: 4258 - outs: - - path: sms_spam/reports/condense/knn.csv - hash: md5 - md5: c8d4f7036e0c3e1cf8fa5a0b922c6ecc - size: 2287605 - compile@ddos-condense/logistic: - cmd: python -m deckard.layers.compile --report_folder ddos/reports/condense/logistic --results_file - ddos/reports/condense/logistic.csv - deps: - - path: ddos/reports/condense/logistic/ - hash: md5 - md5: 7ce841278929a90690417685b7c7f143.dir - size: 5929815 - nfiles: 5888 - outs: - - path: ddos/reports/condense/logistic.csv - hash: md5 - md5: b24764aed957fdf6d2ccb541ef490d37 - size: 3150984 - clean@sms_spam-condense/svc: - cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/svc.csv - -o sms_spam/plots/clean/condense/svc.csv -c conf/clean.yaml - deps: - - path: sms_spam/reports/condense/svc.csv - hash: md5 - md5: 32f06cbea623f845dcfa7400d707abad - size: 1573621 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: sms_spam/plots/clean/condense/svc.csv - hash: md5 - md5: 92b8648f6759e0a56c65aeec4a15aa92 - size: 1223675 - clean@ddos-condense/knn: - cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/knn.csv -o - ddos/plots/clean/condense/knn.csv -c conf/clean.yaml - deps: - - path: ddos/reports/condense/knn.csv - hash: md5 - md5: 0cd0ff58f94fb06093779ff81d37d2bf - size: 4723182 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: ddos/plots/clean/condense/knn.csv - hash: md5 - md5: d214914ecfbba6afbd4ff9a61cb96bb1 - size: 3652514 - clean@truthseeker-condense/svc: - cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/svc.csv - -o truthseeker/plots/clean/condense/svc.csv -c conf/clean.yaml - deps: - - path: truthseeker/reports/condense/svc.csv - hash: md5 - md5: 4cdede4407c88bcda2afc8bbeae91ace - size: 1617655 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: truthseeker/plots/clean/condense/svc.csv - hash: md5 - md5: a17c0cdb6a3fbfae5bd4fcfca1938a96 - size: 1257671 - clean@kdd_nsl-condense/knn: - cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/knn.csv - -o kdd_nsl/plots/clean/condense/knn.csv -c conf/clean.yaml - deps: - - path: kdd_nsl/reports/condense/knn.csv - hash: md5 - md5: 29211ec6d9b2b1a5e9193eaabfff3488 - size: 1608857 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r outs: - - path: kdd_nsl/plots/clean/condense/knn.csv - hash: md5 - md5: 23789b08b0fd1616555611d0e7971db9 - size: 1204868 - clean@kdd_nsl-condense/svc: - cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/svc.csv - -o kdd_nsl/plots/clean/condense/svc.csv -c conf/clean.yaml - deps: - - path: kdd_nsl/reports/condense/svc.csv + - path: sms_spam/logs/gzip_knn/20/symmetry_true hash: md5 - md5: 643a67cb6d5974a787efa6339e3af058 - size: 3003804 - params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric - outs: - - path: kdd_nsl/plots/clean/condense/svc.csv + md5: b900fa95011e3c9620f9a7103baa47a1.dir + size: 1193555 + nfiles: 513 + - path: sms_spam/reports/gzip_knn/20/symmetry_true/train/ hash: md5 - md5: c9b2ff8546f531fa439c664c63fc06fd - size: 2021393 - clean@kdd_nsl-condense/logistic: - cmd: python -m deckard.layers.clean_data -i kdd_nsl/reports/condense/logistic.csv - -o kdd_nsl/plots/clean/condense/logistic.csv -c conf/clean.yaml + md5: 0c2256ed804059b75873b27f8963204e.dir + size: 329514 + nfiles: 356 + grid_search@20-sms_spam-gzip_knn-false: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_knn/20/symmetry_false + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/20/study.csv files.directory=sms_spam + files.reports=reports/gzip_knn/20/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_knn --multirun deps: - - path: kdd_nsl/reports/condense/logistic.csv + - path: conf/gzip_knn.yaml hash: md5 - md5: 4193461c63aca8b61956fc443f5bcd3d - size: 1649004 + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 + - path: params.yaml + hash: md5 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + consider_prior: true + seed: 123 + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 128 + n_jobs: 8 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model_name: ${model_name} + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r outs: - - path: kdd_nsl/plots/clean/condense/logistic.csv + - path: sms_spam/logs/gzip_knn/20/symmetry_false hash: md5 - md5: 55a0ac50149a3e3d93b69c63ccd0d7a3 - size: 1174964 - clean@sms_spam-condense/knn: - cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/knn.csv - -o sms_spam/plots/clean/condense/knn.csv -c conf/clean.yaml + md5: 0554269057beb85cd3746813652ba9d5.dir + size: 1191491 + nfiles: 513 + - path: sms_spam/reports/gzip_knn/20/symmetry_false/train/ + hash: md5 + md5: e25f72d029f72432d5c9a5ffacec0208.dir + size: 341814 + nfiles: 356 + grid_search@20-sms_spam-gzip_logistic-true: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_sms_spam + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/20/symmetry_true + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/20/study.csv + files.directory=sms_spam files.reports=reports/gzip_logistic/20/symmetry_true + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_logistic --multirun deps: - - path: sms_spam/reports/condense/knn.csv + - path: conf/gzip_logistic.yaml + hash: md5 + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 + - path: params.yaml hash: md5 - md5: c8d4f7036e0c3e1cf8fa5a0b922c6ecc - size: 2287605 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + consider_prior: true + seed: 123 + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_trials: 128 + n_jobs: 8 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model_name: ${model_name} + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r outs: - - path: sms_spam/plots/clean/condense/knn.csv + - path: sms_spam/logs/gzip_logistic/20/symmetry_true hash: md5 - md5: 7dda620e8ae59aab14ac83c0071a8b96 - size: 1268504 - clean@sms_spam-condense/logistic: - cmd: python -m deckard.layers.clean_data -i sms_spam/reports/condense/logistic.csv - -o sms_spam/plots/clean/condense/logistic.csv -c conf/clean.yaml + md5: b95404e2e4b0a957a788e82f65a49a10.dir + size: 1268014 + nfiles: 513 + - path: sms_spam/reports/gzip_logistic/20/symmetry_true/train/ + hash: md5 + md5: b2333589409b837e4233aa2fb7cded97.dir + size: 592315 + nfiles: 356 + grid_search@20-sms_spam-gzip_logistic-false: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_sms_spam + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/20/symmetry_false + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/20/study.csv + files.directory=sms_spam files.reports=reports/gzip_logistic/20/symmetry_false + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_logistic --multirun deps: - - path: sms_spam/reports/condense/logistic.csv + - path: conf/gzip_logistic.yaml hash: md5 - md5: 7094b26a582820cc1f88512573ce8c25 - size: 3430438 + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 + - path: params.yaml + hash: md5 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + consider_prior: true + seed: 123 + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_trials: 128 + n_jobs: 8 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model_name: ${model_name} + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r outs: - - path: sms_spam/plots/clean/condense/logistic.csv + - path: sms_spam/logs/gzip_logistic/20/symmetry_false hash: md5 - md5: 1f89cfa87c87f195079e49eb5d6e7ce5 - size: 2461824 - clean@truthseeker-condense/logistic: - cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/logistic.csv - -o truthseeker/plots/clean/condense/logistic.csv -c conf/clean.yaml + md5: 9d4569ebac94dccb57a6d50c04fd2b1c.dir + size: 1252292 + nfiles: 513 + - path: sms_spam/reports/gzip_logistic/20/symmetry_false/train/ + hash: md5 + md5: a4a3af08dfca0a0ba5b94bb0a9ea735a.dir + size: 603823 + nfiles: 343 + grid_search@20-sms_spam-gzip_svc-true: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_svc/20/symmetry_true + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/20/study.csv files.directory=sms_spam + files.reports=reports/gzip_svc/20/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_svc --multirun deps: - - path: truthseeker/reports/condense/logistic.csv + - path: conf/gzip_svc.yaml + hash: md5 + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 + - path: params.yaml hash: md5 - md5: 5c01852f352ac96150fb36c2df9bcbbf - size: 1648856 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + consider_prior: true + seed: 123 + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_trials: 128 + n_jobs: 8 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model_name: ${model_name} + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r outs: - - path: truthseeker/plots/clean/condense/logistic.csv + - path: sms_spam/logs/gzip_svc/20/symmetry_true hash: md5 - md5: 9710addb440069a5ea884d90ed4c394a - size: 1237939 - clean@truthseeker-condense/knn: - cmd: python -m deckard.layers.clean_data -i truthseeker/reports/condense/knn.csv - -o truthseeker/plots/clean/condense/knn.csv -c conf/clean.yaml + md5: 97f387456af594e96fe70ae39cfe8018.dir + size: 1241267 + nfiles: 513 + - path: sms_spam/reports/gzip_svc/20/symmetry_true/train/ + hash: md5 + md5: aa3a7443b115c46ce08aa7a70a7fb77c.dir + size: 542327 + nfiles: 384 + grid_search@20-sms_spam-gzip_svc-false: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_svc/20/symmetry_false + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/20/study.csv files.directory=sms_spam + files.reports=reports/gzip_svc/20/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_svc --multirun deps: - - path: truthseeker/reports/condense/knn.csv + - path: conf/gzip_svc.yaml + hash: md5 + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 + - path: params.yaml hash: md5 - md5: b4ec50d98f613984be6261a059120255 - size: 1595839 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + consider_prior: true + seed: 123 + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_trials: 128 + n_jobs: 8 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model_name: ${model_name} + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r outs: - - path: truthseeker/plots/clean/condense/knn.csv + - path: sms_spam/logs/gzip_svc/20/symmetry_false hash: md5 - md5: a0c8deb8fe7617477ec43fae2a851b4d - size: 1191230 - clean@ddos-condense/svc: - cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/svc.csv -o - ddos/plots/clean/condense/svc.csv -c conf/clean.yaml + md5: dccf212ddba8d745daa30ce1c9efd0b1.dir + size: 1240872 + nfiles: 513 + - path: sms_spam/reports/gzip_svc/20/symmetry_false/train/ + hash: md5 + md5: 923ea8186f9d9630e26fa0da18e03508.dir + size: 542578 + nfiles: 384 + grid_search@20-truthseeker-gzip_knn-true: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_knn/20/symmetry_true + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/20/study.csv + files.directory=truthseeker files.reports=reports/gzip_knn/20/symmetry_true + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_knn --multirun deps: - - path: ddos/reports/condense/svc.csv + - path: conf/gzip_knn.yaml + hash: md5 + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 + - path: params.yaml hash: md5 - md5: 76b35c3e1dfa2d0476a737f9a41c25c4 - size: 3771755 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + consider_prior: true + seed: 123 + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 128 + n_jobs: 8 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model_name: ${model_name} + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r outs: - - path: ddos/plots/clean/condense/svc.csv + - path: truthseeker/logs/gzip_knn/20/symmetry_true hash: md5 - md5: 102b712883464d547a4d2119f6c5df60 - size: 2968961 - clean@ddos-condense/logistic: - cmd: python -m deckard.layers.clean_data -i ddos/reports/condense/logistic.csv - -o ddos/plots/clean/condense/logistic.csv -c conf/clean.yaml + md5: a98ed7354eb47190c6301eb889704388.dir + size: 1206224 + nfiles: 513 + - path: truthseeker/reports/gzip_knn/20/symmetry_true/train/ + hash: md5 + md5: ad20e69c6454627f1483726b0cc91365.dir + size: 331035 + nfiles: 359 + grid_search@20-truthseeker-gzip_knn-false: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_knn/20/symmetry_false + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/20/study.csv + files.directory=truthseeker files.reports=reports/gzip_knn/20/symmetry_false + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_knn --multirun deps: - - path: ddos/reports/condense/logistic.csv + - path: conf/gzip_knn.yaml + hash: md5 + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 + - path: params.yaml hash: md5 - md5: b24764aed957fdf6d2ccb541ef490d37 - size: 3150984 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/clean.yaml: - replace: - model.init.metric: - jaro: Jaro - _winkler: -Winkler - levenshtein: Levenshtein - ncd: NCD - ratio: Ratio - seqRatio: SeqRatio - hamming: Hamming - gzip: Gzip - pkl: Pickle - bz2: BZ2 - zstd: Zstd - lzma: Lzma - model_name: - GzipSVC: k-SVC - GzipLogisticRegressor: k-Logistic - GzipKNN: k-KNN - model.init.symmetric: - true: Symmetric - false: Asymmetric + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + consider_prior: true + seed: 123 + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 128 + n_jobs: 8 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model_name: ${model_name} + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r outs: - - path: ddos/plots/clean/condense/logistic.csv + - path: truthseeker/logs/gzip_knn/20/symmetry_false hash: md5 - md5: bfca6e865bca11a25fa1e42dfbdea0ad - size: 2331762 - merge_condense@ddos: - cmd: python merge.py --big_dir ddos/plots/ --data_file clean/condense/knn.csv - --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder - ddos/plots/ --output_file condensed_merged.csv - deps: - - path: ddos/plots/clean/condense/knn.csv + md5: 2617ca5cb1d8ff3905d50915269c6e9f.dir + size: 1203425 + nfiles: 513 + - path: truthseeker/reports/gzip_knn/20/symmetry_false/train/ hash: md5 - md5: d214914ecfbba6afbd4ff9a61cb96bb1 - size: 3652514 - - path: ddos/plots/clean/condense/logistic.csv + md5: 4a06f23a3f742c65df6594ee04759bf8.dir + size: 342243 + nfiles: 358 + grid_search@20-truthseeker-gzip_logistic-true: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_truthseeker + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/20/symmetry_true + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/20/study.csv + files.directory=truthseeker files.reports=reports/gzip_logistic/20/symmetry_true + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml hash: md5 - md5: bfca6e865bca11a25fa1e42dfbdea0ad - size: 2331762 - - path: ddos/plots/clean/condense/svc.csv + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 + - path: params.yaml hash: md5 - md5: 102b712883464d547a4d2119f6c5df60 - size: 2968961 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + consider_prior: true + seed: 123 + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_trials: 128 + n_jobs: 8 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model_name: ${model_name} + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r outs: - - path: ddos/plots/condensed_merged.csv + - path: truthseeker/logs/gzip_logistic/20/symmetry_true hash: md5 - md5: dc147a2e9c585b39c5e212a46ade70ac - size: 9306964 - merge_condense@kdd_nsl: - cmd: python merge.py --big_dir kdd_nsl/plots/ --data_file clean/condense/knn.csv - --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder - kdd_nsl/plots/ --output_file condensed_merged.csv - deps: - - path: kdd_nsl/plots/clean/condense/knn.csv + md5: ff829c546214f8c48b65d65886826fa3.dir + size: 1277433 + nfiles: 513 + - path: truthseeker/reports/gzip_logistic/20/symmetry_true/train/ hash: md5 - md5: 23789b08b0fd1616555611d0e7971db9 - size: 1204868 - - path: kdd_nsl/plots/clean/condense/logistic.csv + md5: 9fa0a99c495e46db650c6a7a5b520119.dir + size: 596142 + nfiles: 356 + grid_search@20-truthseeker-gzip_logistic-false: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_truthseeker + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/20/symmetry_false + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/20/study.csv + files.directory=truthseeker files.reports=reports/gzip_logistic/20/symmetry_false + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_logistic --multirun + deps: + - path: conf/gzip_logistic.yaml hash: md5 - md5: 55a0ac50149a3e3d93b69c63ccd0d7a3 - size: 1174964 - - path: kdd_nsl/plots/clean/condense/svc.csv + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 + - path: params.yaml hash: md5 - md5: c9b2ff8546f531fa439c664c63fc06fd - size: 2021393 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 + params: + conf/gzip_logistic.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + consider_prior: true + seed: 123 + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_trials: 128 + n_jobs: 8 + params: + +model.init.solver: saga + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None + model_name: ${model_name} + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r outs: - - path: kdd_nsl/plots/condensed_merged.csv + - path: truthseeker/logs/gzip_logistic/20/symmetry_false hash: md5 - md5: 1ddcee7de7db0c1a7d4898de4a03d7b7 - size: 4543759 - merge_condense@sms_spam: - cmd: python merge.py --big_dir sms_spam/plots/ --data_file clean/condense/knn.csv - --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder - sms_spam/plots/ --output_file condensed_merged.csv - deps: - - path: sms_spam/plots/clean/condense/knn.csv + md5: 3236c08228d49f414fb9276f63fd854e.dir + size: 1265237 + nfiles: 513 + - path: truthseeker/reports/gzip_logistic/20/symmetry_false/train/ hash: md5 - md5: 7dda620e8ae59aab14ac83c0071a8b96 - size: 1268504 - - path: sms_spam/plots/clean/condense/logistic.csv + md5: 61c25a8988641a6780633c71c79af7b1.dir + size: 603920 + nfiles: 346 + grid_search@20-truthseeker-gzip_svc-true: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_svc/20/symmetry_true + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/20/study.csv + files.directory=truthseeker files.reports=reports/gzip_svc/20/symmetry_true + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml hash: md5 - md5: 1f89cfa87c87f195079e49eb5d6e7ce5 - size: 2461824 - - path: sms_spam/plots/clean/condense/svc.csv + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 + - path: params.yaml hash: md5 - md5: 92b8648f6759e0a56c65aeec4a15aa92 - size: 1223675 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + consider_prior: true + seed: 123 + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_trials: 128 + n_jobs: 8 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model_name: ${model_name} + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r outs: - - path: sms_spam/plots/condensed_merged.csv + - path: truthseeker/logs/gzip_svc/20/symmetry_true hash: md5 - md5: 8f549743001ca622a6c7c8cbb2b3d17d - size: 5114716 - merge_condense@truthseeker: - cmd: python merge.py --big_dir truthseeker/plots/ --data_file clean/condense/knn.csv - --little_dir_data_file clean/condense/logistic.csv clean/condense/svc.csv --output_folder - truthseeker/plots/ --output_file condensed_merged.csv - deps: - - path: truthseeker/plots/clean/condense/knn.csv + md5: 80d0c1ade291bb4dbc9af47eddab6d27.dir + size: 1250879 + nfiles: 513 + - path: truthseeker/reports/gzip_svc/20/symmetry_true/train/ hash: md5 - md5: a0c8deb8fe7617477ec43fae2a851b4d - size: 1191230 - - path: truthseeker/plots/clean/condense/logistic.csv + md5: 913d1664491e029cb3e45e5fa1d9c2b1.dir + size: 546189 + nfiles: 384 + grid_search@20-truthseeker-gzip_svc-false: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=20 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_svc/20/symmetry_false + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/20/study.csv + files.directory=truthseeker files.reports=reports/gzip_svc/20/symmetry_false + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_svc --multirun + deps: + - path: conf/gzip_svc.yaml hash: md5 - md5: 9710addb440069a5ea884d90ed4c394a - size: 1237939 - - path: truthseeker/plots/clean/condense/svc.csv + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 + - path: params.yaml hash: md5 - md5: a17c0cdb6a3fbfae5bd4fcfca1938a96 - size: 1257671 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 + params: + conf/gzip_svc.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.id} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: + - maximize + metric_names: + - accuracy + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + consider_prior: true + seed: 123 + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + study_name: ${dataset}_${model_name}_${stage} + storage: sqlite:///optuna.db + n_trials: 128 + n_jobs: 8 + params: + +model.init.kernel: rbf,precomputed + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null + model_name: ${model_name} + direction: ${direction} + max_failure_rate: 1.0 + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r outs: - - path: truthseeker/plots/condensed_merged.csv + - path: truthseeker/logs/gzip_svc/20/symmetry_false hash: md5 - md5: 738dc93bfff1b9c167949e722ee79665 - size: 3805499 - grid_search@300-ddos-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=300 + md5: cd321e0e8ed96e2dc914d3f061139e1b.dir + size: 1250531 + nfiles: 513 + - path: truthseeker/reports/gzip_svc/20/symmetry_false/train/ + hash: md5 + md5: 7fd5bb25a3688c3470e30aeee85674ff.dir + size: 546474 + nfiles: 384 + grid_search@100-ddos-gzip_knn-true: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=ddos/logs/gzip_knn/300 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/300/study.csv - files.directory=ddos files.reports=reports/gzip_knn/300 hydra.launcher.n_jobs=-1 + model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_knn/100/symmetry_true + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/100/study.csv files.directory=ddos + files.reports=reports/gzip_knn/100/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio ++raise_exception=True --config-name gzip_knn --multirun deps: - path: conf/gzip_knn.yaml hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: conf/gzip_knn.yaml: hydra: @@ -17306,30 +7566,26 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper direction: ${direction} storage: sqlite:///optuna.db study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 + n_trials: 128 + n_jobs: 8 max_failure_rate: 1.0 params: model.init.k: 1,3,5,7,11 +model.init.weights: uniform,distance +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -17341,367 +7597,115 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_knn - outs: - - path: ddos/logs/gzip_knn/300 - hash: md5 - md5: 1e533c118406ca2ffae2b0a3e11a5035.dir - size: 1671182 - nfiles: 514 - - path: ddos/reports/gzip_knn/300/train/ - hash: md5 - md5: 000376454dd461f25065cdb093e78e7c.dir - size: 1461265 - nfiles: 1403 - plot_condense@sms_spam: - cmd: python -m deckard.layers.plots --path sms_spam/plots/ --file sms_spam/plots/condensed_merged.csv -c - conf/condensed_plots.yaml - deps: - - path: sms_spam/plots/condensed_merged.csv - hash: md5 - md5: 8f549743001ca622a6c7c8cbb2b3d17d - size: 5114716 - params: - conf/condensed_plots.yaml: - line_plot: - - file: sampling_method_vs_accuracy.pdf - hue: model.init.sampling_method - title: - x: model.init.m - xlabel: Percentage of Samples per Class - y: accuracy - ylabel: Accuracy - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim: - - 0 - - 1 - y_scale: linear - legend: - title: Sampling Method - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: sampling_method_vs_train_time.pdf - hue: model.init.sampling_method - title: - x: model.init.m - xlabel: Percentage of Samples per Class - y: train_time - ylabel: Training Time (s) - y_scale: linear - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim: - - 0 - - 1 - legend: - title: Sampling Method - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: sampling_method_vs_predict_time.pdf - hue: model.init.sampling_method - title: - x: model.init.m - xlabel: Percentage of Samples per Class - y: predict_time - ylabel: Prediction Time (s) - y_scale: log - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim: - - 0 - - 1 - legend: - title: Sampling Method - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - outs: - - path: sms_spam/plots/sampling_method_vs_accuracy.pdf - hash: md5 - md5: 8d3c7b03379f2f16bdb6de450083608b - size: 40643 - - path: sms_spam/plots/sampling_method_vs_predict_time.pdf - hash: md5 - md5: 095622e64533aedee66d72079f141c0d - size: 53902 - - path: sms_spam/plots/sampling_method_vs_train_time.pdf - hash: md5 - md5: da26bd3fc967c9925975f6c8ad189a88 - size: 50367 - plot_condense@ddos: - cmd: python -m deckard.layers.plots --path ddos/plots/ --file ddos/plots/condensed_merged.csv -c - conf/condensed_plots.yaml - deps: - - path: ddos/plots/condensed_merged.csv - hash: md5 - md5: dc147a2e9c585b39c5e212a46ade70ac - size: 9306964 - params: - conf/condensed_plots.yaml: - line_plot: - - file: sampling_method_vs_accuracy.pdf - hue: model.init.sampling_method - title: - x: model.init.m - xlabel: Percentage of Samples per Class - y: accuracy - ylabel: Accuracy - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim: - - 0 - - 1 - y_scale: linear - legend: - title: Sampling Method - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: sampling_method_vs_train_time.pdf - hue: model.init.sampling_method - title: - x: model.init.m - xlabel: Percentage of Samples per Class - y: train_time - ylabel: Training Time (s) - y_scale: linear - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim: - - 0 - - 1 - legend: - title: Sampling Method - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: sampling_method_vs_predict_time.pdf - hue: model.init.sampling_method - title: - x: model.init.m - xlabel: Percentage of Samples per Class - y: predict_time - ylabel: Prediction Time (s) - y_scale: log - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim: - - 0 - - 1 - legend: - title: Sampling Method - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 outs: - - path: ddos/plots/sampling_method_vs_accuracy.pdf + - path: ddos/logs/gzip_knn/100/symmetry_true hash: md5 - md5: 09737e6b272979bf7fc879ece10d25e5 - size: 57907 - - path: ddos/plots/sampling_method_vs_predict_time.pdf - hash: md5 - md5: 78e2e0111219f86d189dfb952d81cdba - size: 78230 - - path: ddos/plots/sampling_method_vs_train_time.pdf + md5: ce684eab73c010891cc6eb844e066134.dir + size: 1190708 + nfiles: 513 + - path: ddos/reports/gzip_knn/100/symmetry_true/train/ hash: md5 - md5: ab34ce0b71b6c0153525b0194178ecaf - size: 64512 - plot_condense@kdd_nsl: - cmd: python -m deckard.layers.plots --path kdd_nsl/plots/ --file kdd_nsl/plots/condensed_merged.csv -c - conf/condensed_plots.yaml + md5: 60e9b4f5171f22fb8144383380218108.dir + size: 81468 + nfiles: 91 + grid_search@100-ddos-gzip_knn-false: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 + data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_knn/100/symmetry_false + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/100/study.csv files.directory=ddos + files.reports=reports/gzip_knn/100/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_knn --multirun deps: - - path: kdd_nsl/plots/condensed_merged.csv + - path: conf/gzip_knn.yaml hash: md5 - md5: 1ddcee7de7db0c1a7d4898de4a03d7b7 - size: 4543759 - params: - conf/condensed_plots.yaml: - line_plot: - - file: sampling_method_vs_accuracy.pdf - hue: model.init.sampling_method - title: - x: model.init.m - xlabel: Percentage of Samples per Class - y: accuracy - ylabel: Accuracy - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim: - - 0 - - 1 - y_scale: linear - legend: - title: Sampling Method - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: sampling_method_vs_train_time.pdf - hue: model.init.sampling_method - title: - x: model.init.m - xlabel: Percentage of Samples per Class - y: train_time - ylabel: Training Time (s) - y_scale: linear - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim: - - 0 - - 1 - legend: - title: Sampling Method - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: sampling_method_vs_predict_time.pdf - hue: model.init.sampling_method - title: - x: model.init.m - xlabel: Percentage of Samples per Class - y: predict_time - ylabel: Prediction Time (s) - y_scale: log - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim: - - 0 - - 1 - legend: - title: Sampling Method - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - outs: - - path: kdd_nsl/plots/sampling_method_vs_accuracy.pdf + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 + - path: params.yaml hash: md5 - md5: 1c673220cd32e3f9bd2aa92516d0b20e - size: 38546 - - path: kdd_nsl/plots/sampling_method_vs_predict_time.pdf + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + consider_prior: true + seed: 123 + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 128 + n_jobs: 8 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model_name: ${model_name} + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: ddos/logs/gzip_knn/100/symmetry_false hash: md5 - md5: 4bcb086fcd47e05d2b79e30a12d15869 - size: 50187 - - path: kdd_nsl/plots/sampling_method_vs_train_time.pdf + md5: 307edd5cacb6d130cdca319d74e42152.dir + size: 1200449 + nfiles: 513 + - path: ddos/reports/gzip_knn/100/symmetry_false/train/ hash: md5 - md5: 2b3e91d9b656ba35d06f8e97d1e8359d - size: 45992 - grid_search@300-ddos-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=300 + md5: 9eb4c5ed862761d977cbec997e27a109.dir + size: 286576 + nfiles: 321 + grid_search@100-ddos-gzip_logistic-true: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=ddos/logs/gzip_logistic/300 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/300/study.csv - files.directory=ddos files.reports=reports/gzip_logistic/300 hydra.launcher.n_jobs=-1 + model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_logistic/100/symmetry_true + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/100/study.csv + files.directory=ddos files.reports=reports/gzip_logistic/100/symmetry_true hydra.launcher.n_jobs=-1 + ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio ++raise_exception=True --config-name gzip_logistic --multirun deps: - path: conf/gzip_logistic.yaml hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: conf/gzip_logistic.yaml: hydra: @@ -17721,31 +7725,27 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 + n_trials: 128 + n_jobs: 8 params: +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) +model.init.fit_intercept: True,False +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -17759,36 +7759,38 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_logistic outs: - - path: ddos/logs/gzip_logistic/300 + - path: ddos/logs/gzip_logistic/100/symmetry_true hash: md5 - md5: ace39d7825de3ce5c0d678839c812ab6.dir - size: 1765030 - nfiles: 514 - - path: ddos/reports/gzip_logistic/300/train/ + md5: d0b4bd67c2297fcf7cd87b5bb49830ce.dir + size: 1236038 + nfiles: 513 + - path: ddos/reports/gzip_logistic/100/symmetry_true/train/ hash: md5 - md5: 9f23532033970310bd5915d4018de935.dir - size: 1436932 - nfiles: 963 - grid_search@300-ddos-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=300 - data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=ddos/logs/gzip_svc/300 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/300/study.csv - files.directory=ddos files.reports=reports/gzip_svc/300 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_svc --multirun + md5: 3f4bc5d4c66937cccc23ae865cd69762.dir + size: 636279 + nfiles: 332 + grid_search@100-ddos-gzip_logistic-false: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 + data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_logistic/100/symmetry_false + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/100/study.csv + files.directory=ddos files.reports=reports/gzip_logistic/100/symmetry_false + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_logistic --multirun deps: - - path: conf/gzip_svc.yaml + - path: conf/gzip_logistic.yaml hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_svc.yaml: + conf/gzip_logistic.yaml: hydra: run: dir: ${dataset}/logs/${stage}/ @@ -17800,37 +7802,33 @@ stages: _target_: database.OptunaStudyDumpCallback storage: ${hydra.sweeper.storage} study_name: ${hydra.sweeper.study_name} - directions: - - maximize - metric_names: - - accuracy + directions: ${direction} + metric_names: ${optimizers} output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 params: - +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.gamma: scale,auto - +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + +model.init.solver: saga + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -17844,147 +7842,37 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_svc - outs: - - path: ddos/logs/gzip_svc/300 - hash: md5 - md5: 7681421b662e0a0690e9a1a6a4cf4b79.dir - size: 1710386 - nfiles: 514 - - path: ddos/reports/gzip_svc/300/train/ - hash: md5 - md5: c872a806e708289c65e6856bc2a057bf.dir - size: 1393355 - nfiles: 1045 - plot_condense@truthseeker: - cmd: python -m deckard.layers.plots --path truthseeker/plots/ --file truthseeker/plots/condensed_merged.csv -c - conf/condensed_plots.yaml - deps: - - path: truthseeker/plots/condensed_merged.csv - hash: md5 - md5: 738dc93bfff1b9c167949e722ee79665 - size: 3805499 - params: - conf/condensed_plots.yaml: - line_plot: - - file: sampling_method_vs_accuracy.pdf - hue: model.init.sampling_method - title: - x: model.init.m - xlabel: Percentage of Samples per Class - y: accuracy - ylabel: Accuracy - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim: - - 0 - - 1 - y_scale: linear - legend: - title: Sampling Method - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: sampling_method_vs_train_time.pdf - hue: model.init.sampling_method - title: - x: model.init.m - xlabel: Percentage of Samples per Class - y: train_time - ylabel: Training Time (s) - y_scale: linear - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim: - - 0 - - 1 - legend: - title: Sampling Method - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 - - file: sampling_method_vs_predict_time.pdf - hue: model.init.sampling_method - title: - x: model.init.m - xlabel: Percentage of Samples per Class - y: predict_time - ylabel: Prediction Time (s) - y_scale: log - hue_order: - - random - - svc - - knn - - sum - - medoid - - nearmiss - - hardness - errorbar: se - err_style: bars - xlim: - - 0 - - 1 - legend: - title: Sampling Method - bbox_to_anchor: - - 1.05 - - 0.5 - loc: center left - prop: - size: 14 outs: - - path: truthseeker/plots/sampling_method_vs_accuracy.pdf - hash: md5 - md5: 0d293f64173585cb19c88218a7327f83 - size: 18158 - - path: truthseeker/plots/sampling_method_vs_predict_time.pdf + - path: ddos/logs/gzip_logistic/100/symmetry_false hash: md5 - md5: bb494d7b950451096bb639f3a9f1b4cb - size: 45092 - - path: truthseeker/plots/sampling_method_vs_train_time.pdf + md5: 54987f50efd1f9833711c4bce8ad266b.dir + size: 1204334 + nfiles: 513 + - path: ddos/reports/gzip_logistic/100/symmetry_false/train/ hash: md5 - md5: 85a9eeb8f5aecc63f5634b12483941cf - size: 39796 - grid_search@500-ddos-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=500 - data.sample.test_size=100 model_name=gzip_logistic model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_logistic_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=ddos/logs/gzip_logistic/500 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_logistic/500/study.csv - files.directory=ddos files.reports=reports/gzip_logistic/500 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_logistic --multirun + md5: 4237b3f9a08decdbf109a54fce741a4e.dir + size: 659696 + nfiles: 306 + grid_search@100-ddos-gzip_svc-true: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 + data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_svc/100/symmetry_true + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/100/study.csv files.directory=ddos + files.reports=reports/gzip_svc/100/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_svc --multirun deps: - - path: conf/gzip_logistic.yaml + - path: conf/gzip_svc.yaml hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_logistic.yaml: + conf/gzip_svc.yaml: hydra: run: dir: ${dataset}/logs/${stage}/ @@ -17996,37 +7884,33 @@ stages: _target_: database.OptunaStudyDumpCallback storage: ${hydra.sweeper.storage} study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} + directions: + - maximize + metric_names: + - accuracy output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 + n_trials: 128 + n_jobs: 8 params: - +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.fit_intercept: True,False - +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + +model.init.kernel: rbf,precomputed + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -18040,34 +7924,35 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_logistic outs: - - path: ddos/logs/gzip_logistic/500 - hash: md5 - md5: afb6463625f139e82a88976c24b93f16.dir - size: 1791134 - nfiles: 514 - - path: ddos/reports/gzip_logistic/500/train/ - hash: md5 - md5: dbed10dfbc2747c79e14dcedcbce0661.dir - size: 968208 - nfiles: 702 - grid_search@500-ddos-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=500 + - path: ddos/logs/gzip_svc/100/symmetry_true + hash: md5 + md5: 8f54e554e59aa39da2cc6a545a2b2a84.dir + size: 1238692 + nfiles: 513 + - path: ddos/reports/gzip_svc/100/symmetry_true/train/ + hash: md5 + md5: 1d55a1ad04addb2611ea268d0d5c037c.dir + size: 552051 + nfiles: 384 + grid_search@100-ddos-gzip_svc-false: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=ddos/logs/gzip_svc/500 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/500/study.csv - files.directory=ddos files.reports=reports/gzip_svc/500 hydra.launcher.n_jobs=-1 + model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_ddos hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_svc/100/symmetry_false + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_svc/100/study.csv files.directory=ddos + files.reports=reports/gzip_svc/100/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio ++raise_exception=True --config-name gzip_svc --multirun deps: - path: conf/gzip_svc.yaml hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: conf/gzip_svc.yaml: hydra: @@ -18089,29 +7974,25 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 params: +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.C: tag(log, interval(1e-3, 1e3)) +model.init.gamma: scale,auto +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -18125,43 +8006,122 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_svc outs: - - path: ddos/logs/gzip_svc/500 + - path: ddos/logs/gzip_svc/100/symmetry_false hash: md5 - md5: 319357234ff9123f09bb6603fe74866f.dir - size: 1737584 - nfiles: 514 - - path: ddos/reports/gzip_svc/500/train/ + md5: 20385e7fa159098729a46a9ec8ad3e2f.dir + size: 1240441 + nfiles: 513 + - path: ddos/reports/gzip_svc/100/symmetry_false/train/ hash: md5 - md5: 63ecb36bf4e16027b60bcd2892330829.dir - size: 897567 - nfiles: 768 - grid_search@100-sms_spam-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic - model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_sms_spam - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/100 - hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/100/study.csv - files.directory=sms_spam files.reports=reports/gzip_logistic/100 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_logistic --multirun + md5: 006736d48dc878223021e6c5cc721e21.dir + size: 552730 + nfiles: 384 + grid_search@100-kdd_nsl-gzip_knn-true: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_knn/100/symmetry_true + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/100/study.csv files.directory=kdd_nsl + files.reports=reports/gzip_knn/100/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_knn --multirun deps: - - path: conf/gzip_logistic.yaml + - path: conf/gzip_knn.yaml hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_logistic.yaml: + conf/gzip_knn.yaml: hydra: run: dir: ${dataset}/logs/${stage}/ sweep: dir: ??? - subdir: ${hydra.job.id} + subdir: ${hydra.job.num} + callbacks: + study_dump: + _target_: database.OptunaStudyDumpCallback + storage: ${hydra.sweeper.storage} + study_name: ${hydra.sweeper.study_name} + directions: ${direction} + metric_names: ${optimizers} + output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv + sweeper: + sampler: + _target_: optuna.samplers.TPESampler + consider_prior: true + seed: 123 + prior_weight: 1.0 + consider_magic_clip: true + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 + multivariate: true + _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper + direction: ${direction} + storage: sqlite:///optuna.db + study_name: ${dataset}_${model_name}_${stage} + n_trials: 128 + n_jobs: 8 + max_failure_rate: 1.0 + params: + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute + model_name: ${model_name} + launcher: + _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher + n_jobs: 8 + prefer: processes + verbose: 1 + timeout: + pre_dispatch: ${hydra.sweeper.n_jobs} + batch_size: auto + temp_folder: /tmp/deckard + max_nbytes: 100000 + mmap_mode: r + outs: + - path: kdd_nsl/logs/gzip_knn/100/symmetry_true + hash: md5 + md5: 549fe2e753e0bcf601fd788dec7aeb1e.dir + size: 1188776 + nfiles: 513 + - path: kdd_nsl/reports/gzip_knn/100/symmetry_true/train/ + hash: md5 + md5: c98bd9dce2feec89f7aec764a2c6d1e7.dir + size: 179210 + nfiles: 190 + grid_search@100-kdd_nsl-gzip_knn-false: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_knn/100/symmetry_false + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/100/study.csv files.directory=kdd_nsl + files.reports=reports/gzip_knn/100/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_knn --multirun + deps: + - path: conf/gzip_knn.yaml + hash: md5 + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 + - path: params.yaml + hash: md5 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 + params: + conf/gzip_knn.yaml: + hydra: + run: + dir: ${dataset}/logs/${stage}/ + sweep: + dir: ??? + subdir: ${hydra.job.num} callbacks: study_dump: _target_: database.OptunaStudyDumpCallback @@ -18173,33 +8133,26 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} + direction: ${direction} storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 + study_name: ${dataset}_${model_name}_${stage} + n_trials: 128 + n_jobs: 8 + max_failure_rate: 1.0 params: - +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.fit_intercept: True,False - +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -18211,36 +8164,38 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_logistic outs: - - path: sms_spam/logs/gzip_logistic/100 + - path: kdd_nsl/logs/gzip_knn/100/symmetry_false hash: md5 - md5: d1120618c5a674fe50c5717e2d71d640.dir - size: 1554813 - nfiles: 514 - - path: sms_spam/reports/gzip_logistic/100/train/ + md5: 0a1d8131642b28351971a5294828d0d7.dir + size: 1127001 + nfiles: 513 + - path: kdd_nsl/reports/gzip_knn/100/symmetry_false/train/ hash: md5 - md5: 89f61791ac36513c4957057485a2e8e3.dir - size: 553318 - nfiles: 357 - grid_search@100-sms_spam-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=sms_spam/logs/gzip_svc/100 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/100/study.csv - files.directory=sms_spam files.reports=reports/gzip_svc/100 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_svc --multirun + md5: abf88a5a4a306ec284320cf3aa409135.dir + size: 155023 + nfiles: 138 + grid_search@100-kdd_nsl-gzip_logistic-true: + cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_kdd_nsl + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/100/symmetry_true + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/100/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_logistic/100/symmetry_true + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_logistic --multirun deps: - - path: conf/gzip_svc.yaml + - path: conf/gzip_logistic.yaml hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_svc.yaml: + conf/gzip_logistic.yaml: hydra: run: dir: ${dataset}/logs/${stage}/ @@ -18252,37 +8207,33 @@ stages: _target_: database.OptunaStudyDumpCallback storage: ${hydra.sweeper.storage} study_name: ${hydra.sweeper.study_name} - directions: - - maximize - metric_names: - - accuracy + directions: ${direction} + metric_names: ${optimizers} output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 params: - +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.gamma: scale,auto - +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + +model.init.solver: saga + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -18296,42 +8247,44 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_svc outs: - - path: sms_spam/logs/gzip_svc/100 + - path: kdd_nsl/logs/gzip_logistic/100/symmetry_true hash: md5 - md5: cb8e4936d6ee03af99fa775d8b4b956b.dir - size: 1483653 - nfiles: 514 - - path: sms_spam/reports/gzip_svc/100/train/ + md5: e57d0862551308c0ec0cabd6542a55e5.dir + size: 1239394 + nfiles: 513 + - path: kdd_nsl/reports/gzip_logistic/100/symmetry_true/train/ hash: md5 - md5: ae31535b48c489e3040a2836c43215a5.dir - size: 543085 - nfiles: 384 - grid_search@300-kdd_nsl-gzip_knn: + md5: af7ccccb3c94a39edbbd239e9cc2a6ae.dir + size: 646824 + nfiles: 327 + grid_search@100-kdd_nsl-gzip_logistic-false: cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=kdd_nsl/logs/gzip_knn/300 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/300/study.csv - files.directory=kdd_nsl files.reports=reports/gzip_knn/300 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_knn --multirun + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_kdd_nsl + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/100/symmetry_false + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/100/study.csv + files.directory=kdd_nsl files.reports=reports/gzip_logistic/100/symmetry_false + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_logistic --multirun deps: - - path: conf/gzip_knn.yaml + - path: conf/gzip_logistic.yaml hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_knn.yaml: + conf/gzip_logistic.yaml: hydra: run: dir: ${dataset}/logs/${stage}/ sweep: dir: ??? - subdir: ${hydra.job.num} + subdir: ${hydra.job.id} callbacks: study_dump: _target_: database.OptunaStudyDumpCallback @@ -18343,30 +8296,29 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - direction: ${direction} - storage: sqlite:///optuna.db study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 - max_failure_rate: 1.0 + storage: sqlite:///optuna.db + n_trials: 128 + n_jobs: 8 params: - model.init.k: 1,3,5,7,11 - +model.init.weights: uniform,distance - +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + +model.init.solver: saga + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -18378,37 +8330,37 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_knn outs: - - path: kdd_nsl/logs/gzip_knn/300 + - path: kdd_nsl/logs/gzip_logistic/100/symmetry_false hash: md5 - md5: d3f58cbd5181a4f86ac660aba7173dfb.dir - size: 1437824 - nfiles: 514 - - path: kdd_nsl/reports/gzip_knn/300/train/ + md5: 3ee2c47866f4ce98afa41e1d10dc99c8.dir + size: 1285300 + nfiles: 513 + - path: kdd_nsl/reports/gzip_logistic/100/symmetry_false/train/ hash: md5 - md5: d5317915e16e54a5fb4c82963cc0b058.dir - size: 825336 - nfiles: 612 - grid_search@300-kdd_nsl-gzip_logistic: + md5: c7034228ec933542633506b363bdd18a.dir + size: 586323 + nfiles: 367 + grid_search@100-kdd_nsl-gzip_svc-true: cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_logistic - model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_kdd_nsl - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/300 - hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/300/study.csv - files.directory=kdd_nsl files.reports=reports/gzip_logistic/300 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_logistic --multirun + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_svc/100/symmetry_true + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/100/study.csv files.directory=kdd_nsl + files.reports=reports/gzip_svc/100/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_svc --multirun deps: - - path: conf/gzip_logistic.yaml + - path: conf/gzip_svc.yaml hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_logistic.yaml: + conf/gzip_svc.yaml: hydra: run: dir: ${dataset}/logs/${stage}/ @@ -18420,37 +8372,33 @@ stages: _target_: database.OptunaStudyDumpCallback storage: ${hydra.sweeper.storage} study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} + directions: + - maximize + metric_names: + - accuracy output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 + n_trials: 128 + n_jobs: 8 params: - +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.fit_intercept: True,False - +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + +model.init.kernel: rbf,precomputed + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -18464,34 +8412,35 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_logistic outs: - - path: kdd_nsl/logs/gzip_logistic/300 + - path: kdd_nsl/logs/gzip_svc/100/symmetry_true hash: md5 - md5: 6793362a9053b6f28647bb49875ebcf3.dir - size: 1634660 - nfiles: 514 - - path: kdd_nsl/reports/gzip_logistic/300/train/ + md5: 66d83844ef05adb0a121fce7b252b683.dir + size: 1250230 + nfiles: 513 + - path: kdd_nsl/reports/gzip_svc/100/symmetry_true/train/ hash: md5 - md5: f2a46e55c8597a4d4082202f69186083.dir - size: 945424 - nfiles: 723 - grid_search@300-kdd_nsl-gzip_svc: + md5: 9de34dd6d2fb5ad4ebb92c7dfcf05629.dir + size: 555703 + nfiles: 384 + grid_search@100-kdd_nsl-gzip_svc-false: cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=kdd_nsl/logs/gzip_svc/300 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/300/study.csv - files.directory=kdd_nsl files.reports=reports/gzip_svc/300 hydra.launcher.n_jobs=-1 + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_svc/100/symmetry_false + hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/100/study.csv files.directory=kdd_nsl + files.reports=reports/gzip_svc/100/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio ++raise_exception=True --config-name gzip_svc --multirun deps: - path: conf/gzip_svc.yaml hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: conf/gzip_svc.yaml: hydra: @@ -18513,29 +8462,25 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 params: +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.C: tag(log, interval(1e-3, 1e3)) +model.init.gamma: scale,auto +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -18549,34 +8494,36 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_svc outs: - - path: kdd_nsl/logs/gzip_svc/300 + - path: kdd_nsl/logs/gzip_svc/100/symmetry_false hash: md5 - md5: 1bd3b191acf0f78e361e1bc3cb6df928.dir - size: 1584389 - nfiles: 514 - - path: kdd_nsl/reports/gzip_svc/300/train/ + md5: 977a69c4aa921c8559e687b1ca7fb3b6.dir + size: 1244242 + nfiles: 513 + - path: kdd_nsl/reports/gzip_svc/100/symmetry_false/train/ hash: md5 - md5: b6e64c8b751bf3a140aa9871f341a173.dir - size: 899234 - nfiles: 765 - grid_search@300-sms_spam-gzip_knn: + md5: 4dafa970272be8aa5c954ef2c8883ce1.dir + size: 555022 + nfiles: 384 + grid_search@100-sms_spam-gzip_knn-true: cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=sms_spam/logs/gzip_knn/300 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/300/study.csv - files.directory=sms_spam files.reports=reports/gzip_knn/300 hydra.launcher.n_jobs=-1 + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_knn/100/symmetry_true + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/100/study.csv + files.directory=sms_spam files.reports=reports/gzip_knn/100/symmetry_true hydra.launcher.n_jobs=-1 + ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio ++raise_exception=True --config-name gzip_knn --multirun deps: - path: conf/gzip_knn.yaml hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: conf/gzip_knn.yaml: hydra: @@ -18596,30 +8543,26 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper direction: ${direction} storage: sqlite:///optuna.db study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 + n_trials: 128 + n_jobs: 8 max_failure_rate: 1.0 params: model.init.k: 1,3,5,7,11 +model.init.weights: uniform,distance +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -18631,43 +8574,44 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_knn outs: - - path: sms_spam/logs/gzip_knn/300 + - path: sms_spam/logs/gzip_knn/100/symmetry_true hash: md5 - md5: 09019492218a189aabe0601cb4c3f3a3.dir - size: 1460894 - nfiles: 514 - - path: sms_spam/reports/gzip_knn/300/train/ + md5: 78ca4529619f53661b14a5d0c4cb99bd.dir + size: 1086010 + nfiles: 513 + - path: sms_spam/reports/gzip_knn/100/symmetry_true/train/ hash: md5 - md5: 3aa09498a167a50051ee2fdf3e46d62d.dir - size: 364240 - nfiles: 349 - grid_search@300-sms_spam-gzip_logistic: + md5: 688b101d8f5ff7b2e466c0e9492e3d6a.dir + size: 107355 + nfiles: 118 + grid_search@100-sms_spam-gzip_knn-false: cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_logistic - model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_sms_spam - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/300 - hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/300/study.csv - files.directory=sms_spam files.reports=reports/gzip_logistic/300 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_logistic --multirun + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_knn/100/symmetry_false + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/100/study.csv + files.directory=sms_spam files.reports=reports/gzip_knn/100/symmetry_false hydra.launcher.n_jobs=-1 + ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_knn --multirun deps: - - path: conf/gzip_logistic.yaml + - path: conf/gzip_knn.yaml hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_logistic.yaml: + conf/gzip_knn.yaml: hydra: run: dir: ${dataset}/logs/${stage}/ sweep: dir: ??? - subdir: ${hydra.job.id} + subdir: ${hydra.job.num} callbacks: study_dump: _target_: database.OptunaStudyDumpCallback @@ -18679,33 +8623,26 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} + direction: ${direction} storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 + study_name: ${dataset}_${model_name}_${stage} + n_trials: 128 + n_jobs: 8 + max_failure_rate: 1.0 params: - +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.fit_intercept: True,False - +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -18717,36 +8654,38 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_logistic outs: - - path: sms_spam/logs/gzip_logistic/300 + - path: sms_spam/logs/gzip_knn/100/symmetry_false hash: md5 - md5: 627574a996abf0037be2b9d798c0a1f6.dir - size: 1593011 - nfiles: 514 - - path: sms_spam/reports/gzip_logistic/300/train/ + md5: b77d9d0576d484d42fa24401a1d81509.dir + size: 1142222 + nfiles: 513 + - path: sms_spam/reports/gzip_knn/100/symmetry_false/train/ hash: md5 - md5: 886edc50f38dc580603074bf8dc46835.dir - size: 553839 - nfiles: 363 - grid_search@300-sms_spam-gzip_svc: + md5: 663f10d7b2a3647caecaa978b7b7d983.dir + size: 119667 + nfiles: 117 + grid_search@100-sms_spam-gzip_logistic-true: cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=sms_spam/logs/gzip_svc/300 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/300/study.csv - files.directory=sms_spam files.reports=reports/gzip_svc/300 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_svc --multirun + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_sms_spam + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/100/symmetry_true + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/100/study.csv + files.directory=sms_spam files.reports=reports/gzip_logistic/100/symmetry_true + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_logistic --multirun deps: - - path: conf/gzip_svc.yaml + - path: conf/gzip_logistic.yaml hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_svc.yaml: + conf/gzip_logistic.yaml: hydra: run: dir: ${dataset}/logs/${stage}/ @@ -18758,37 +8697,33 @@ stages: _target_: database.OptunaStudyDumpCallback storage: ${hydra.sweeper.storage} study_name: ${hydra.sweeper.study_name} - directions: - - maximize - metric_names: - - accuracy + directions: ${direction} + metric_names: ${optimizers} output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 params: - +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.gamma: scale,auto - +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + +model.init.solver: saga + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -18802,42 +8737,44 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_svc outs: - - path: sms_spam/logs/gzip_svc/300 + - path: sms_spam/logs/gzip_logistic/100/symmetry_true hash: md5 - md5: 7d9d939af4228ad75b78ee5c347a984a.dir - size: 1513139 - nfiles: 514 - - path: sms_spam/reports/gzip_svc/300/train/ + md5: 517eb16a845fa795e775ef9a68e0a0c6.dir + size: 1234485 + nfiles: 513 + - path: sms_spam/reports/gzip_logistic/100/symmetry_true/train/ hash: md5 - md5: cb8713e4f13494c3c1ab3c93c238d2d7.dir - size: 544369 - nfiles: 384 - grid_search@300-truthseeker-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=truthseeker/logs/gzip_knn/300 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/300/study.csv - files.directory=truthseeker files.reports=reports/gzip_knn/300 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_knn --multirun + md5: 80878d8c169e37e8110005c63a1ee5d0.dir + size: 635861 + nfiles: 326 + grid_search@100-sms_spam-gzip_logistic-false: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_sms_spam + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/100/symmetry_false + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/100/study.csv + files.directory=sms_spam files.reports=reports/gzip_logistic/100/symmetry_false + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_logistic --multirun deps: - - path: conf/gzip_knn.yaml + - path: conf/gzip_logistic.yaml hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_knn.yaml: + conf/gzip_logistic.yaml: hydra: run: dir: ${dataset}/logs/${stage}/ sweep: dir: ??? - subdir: ${hydra.job.num} + subdir: ${hydra.job.id} callbacks: study_dump: _target_: database.OptunaStudyDumpCallback @@ -18849,30 +8786,29 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - direction: ${direction} - storage: sqlite:///optuna.db study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 - max_failure_rate: 1.0 + storage: sqlite:///optuna.db + n_trials: 128 + n_jobs: 8 params: - model.init.k: 1,3,5,7,11 - +model.init.weights: uniform,distance - +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + +model.init.solver: saga + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) + direction: ${direction} + max_failure_rate: 1.0 launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -18884,37 +8820,38 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_knn outs: - - path: truthseeker/logs/gzip_knn/300 + - path: sms_spam/logs/gzip_logistic/100/symmetry_false hash: md5 - md5: 7fc2fb64903d90052db980e395a73a1b.dir - size: 1418937 - nfiles: 514 - - path: truthseeker/reports/gzip_knn/300/train/ + md5: 394ed9398208455dae29046d35774913.dir + size: 1229002 + nfiles: 513 + - path: sms_spam/reports/gzip_logistic/100/symmetry_false/train/ hash: md5 - md5: 1b7d0b73ddb24fa30f48675625cad64c.dir - size: 384561 - nfiles: 332 - grid_search@300-truthseeker-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_logistic - model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_truthseeker - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/300 - hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/300/study.csv - files.directory=truthseeker files.reports=reports/gzip_logistic/300 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_logistic --multirun + md5: 1bd2509e914115c6a834f630872fe406.dir + size: 628941 + nfiles: 323 + grid_search@100-sms_spam-gzip_svc-true: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_svc/100/symmetry_true + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/100/study.csv + files.directory=sms_spam files.reports=reports/gzip_svc/100/symmetry_true hydra.launcher.n_jobs=-1 + ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_svc --multirun deps: - - path: conf/gzip_logistic.yaml + - path: conf/gzip_svc.yaml hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_logistic.yaml: + conf/gzip_svc.yaml: hydra: run: dir: ${dataset}/logs/${stage}/ @@ -18926,37 +8863,33 @@ stages: _target_: database.OptunaStudyDumpCallback storage: ${hydra.sweeper.storage} study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} + directions: + - maximize + metric_names: + - accuracy output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 + n_trials: 128 + n_jobs: 8 params: - +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.fit_intercept: True,False - +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + +model.init.kernel: rbf,precomputed + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -18970,34 +8903,36 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_logistic outs: - - path: truthseeker/logs/gzip_logistic/300 + - path: sms_spam/logs/gzip_svc/100/symmetry_true hash: md5 - md5: 121b624ea70d27aba89bd5448c35580f.dir - size: 1564349 - nfiles: 514 - - path: truthseeker/reports/gzip_logistic/300/train/ + md5: c0931c4a2af0f0b39b4fb699e5ff8850.dir + size: 1246641 + nfiles: 513 + - path: sms_spam/reports/gzip_svc/100/symmetry_true/train/ hash: md5 - md5: 7dfeff37b85b221b60c7bad442f21658.dir - size: 557318 - nfiles: 367 - grid_search@300-truthseeker-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=300 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=truthseeker/logs/gzip_svc/300 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/300/study.csv - files.directory=truthseeker files.reports=reports/gzip_svc/300 hydra.launcher.n_jobs=-1 + md5: 903ac9307687b483ee7f60f5c5a9e068.dir + size: 543384 + nfiles: 384 + grid_search@100-sms_spam-gzip_svc-false: + cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_svc/100/symmetry_false + hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/100/study.csv + files.directory=sms_spam files.reports=reports/gzip_svc/100/symmetry_false hydra.launcher.n_jobs=-1 + ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio ++raise_exception=True --config-name gzip_svc --multirun deps: - path: conf/gzip_svc.yaml hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: conf/gzip_svc.yaml: hydra: @@ -19019,29 +8954,25 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 params: +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.C: tag(log, interval(1e-3, 1e3)) +model.init.gamma: scale,auto +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -19055,34 +8986,36 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_svc outs: - - path: truthseeker/logs/gzip_svc/300 + - path: sms_spam/logs/gzip_svc/100/symmetry_false hash: md5 - md5: c1b03e3fa37ca812864d04d3a38216db.dir - size: 1536045 - nfiles: 514 - - path: truthseeker/reports/gzip_svc/300/train/ + md5: f37630902004d80cb73ff229905ca426.dir + size: 1247648 + nfiles: 513 + - path: sms_spam/reports/gzip_svc/100/symmetry_false/train/ hash: md5 - md5: 2cf3648372291b72f9b16020c5c3ad4e.dir - size: 548358 + md5: 58dc217409a236b747a999da2ef4cee1.dir + size: 543731 nfiles: 384 - grid_search@500-ddos-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=500 - data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=ddos/logs/gzip_knn/500 hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/500/study.csv - files.directory=ddos files.reports=reports/gzip_knn/500 hydra.launcher.n_jobs=-1 + grid_search@100-truthseeker-gzip_knn-true: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_knn/100/symmetry_true + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/100/study.csv + files.directory=truthseeker files.reports=reports/gzip_knn/100/symmetry_true + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio ++raise_exception=True --config-name gzip_knn --multirun deps: - path: conf/gzip_knn.yaml hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: conf/gzip_knn.yaml: hydra: @@ -19102,30 +9035,26 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper direction: ${direction} storage: sqlite:///optuna.db study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 + n_trials: 128 + n_jobs: 8 max_failure_rate: 1.0 params: model.init.k: 1,3,5,7,11 +model.init.weights: uniform,distance +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -19137,34 +9066,36 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_knn outs: - - path: ddos/logs/gzip_knn/500 + - path: truthseeker/logs/gzip_knn/100/symmetry_true hash: md5 - md5: ebb76a3ffe046f5763072644ec826dd9.dir - size: 1693130 - nfiles: 514 - - path: ddos/reports/gzip_knn/500/train/ + md5: 3bb5017fdd0b61fd7b5be594c4dd0b9c.dir + size: 1193938 + nfiles: 513 + - path: truthseeker/reports/gzip_knn/100/symmetry_true/train/ hash: md5 - md5: 00682fbb7c897d179ed788f09be3b1e9.dir - size: 732559 - nfiles: 763 - grid_search@500-kdd_nsl-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=kdd_nsl/logs/gzip_knn/500 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_knn/500/study.csv - files.directory=kdd_nsl files.reports=reports/gzip_knn/500 hydra.launcher.n_jobs=-1 + md5: c0ef5fa56bc9c65e6b6abe943f424be6.dir + size: 227250 + nfiles: 244 + grid_search@100-truthseeker-gzip_knn-false: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_knn/100/symmetry_false + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/100/study.csv + files.directory=truthseeker files.reports=reports/gzip_knn/100/symmetry_false + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio ++raise_exception=True --config-name gzip_knn --multirun deps: - path: conf/gzip_knn.yaml hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 + md5: 2d0f54d62dcdc05d21ea1730899de0bb + size: 1827 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: conf/gzip_knn.yaml: hydra: @@ -19184,30 +9115,26 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper direction: ${direction} storage: sqlite:///optuna.db study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 + n_trials: 128 + n_jobs: 8 max_failure_rate: 1.0 params: model.init.k: 1,3,5,7,11 +model.init.weights: uniform,distance +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -19219,35 +9146,36 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_knn outs: - - path: kdd_nsl/logs/gzip_knn/500 + - path: truthseeker/logs/gzip_knn/100/symmetry_false hash: md5 - md5: f1d5a2b6b59bc61a8c8d9c52d3a2ad11.dir - size: 1496906 - nfiles: 514 - - path: kdd_nsl/reports/gzip_knn/500/train/ + md5: 77709b1d2f5973a004742328fa7ccf46.dir + size: 1173316 + nfiles: 513 + - path: truthseeker/reports/gzip_knn/100/symmetry_false/train/ hash: md5 - md5: bffa17c78573257f1d85dccf5d93fade.dir - size: 388686 - nfiles: 335 - grid_search@500-kdd_nsl-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_logistic - model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_kdd_nsl - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=kdd_nsl/logs/gzip_logistic/500 - hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_logistic/500/study.csv - files.directory=kdd_nsl files.reports=reports/gzip_logistic/500 hydra.launcher.n_jobs=-1 + md5: 0a3609651300c7e4d773fdce2af08984.dir + size: 171434 + nfiles: 160 + grid_search@100-truthseeker-gzip_logistic-true: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null model.init.symmetric=true hydra.sweeper.study_name=gzip_logistic_truthseeker + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/100/symmetry_true + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/100/study.csv + files.directory=truthseeker files.reports=reports/gzip_logistic/100/symmetry_true + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio ++raise_exception=True --config-name gzip_logistic --multirun deps: - path: conf/gzip_logistic.yaml hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: conf/gzip_logistic.yaml: hydra: @@ -19267,31 +9195,27 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 + n_trials: 128 + n_jobs: 8 params: +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) +model.init.fit_intercept: True,False +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -19305,36 +9229,38 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_logistic outs: - - path: kdd_nsl/logs/gzip_logistic/500 + - path: truthseeker/logs/gzip_logistic/100/symmetry_true hash: md5 - md5: 44795a3a64e10088623faf15b87a4548.dir - size: 1666384 - nfiles: 514 - - path: kdd_nsl/reports/gzip_logistic/500/train/ + md5: d6d4b0b157b08346ad1b518d2edfe1f8.dir + size: 1243931 + nfiles: 513 + - path: truthseeker/reports/gzip_logistic/100/symmetry_true/train/ hash: md5 - md5: 607cd0515dec2502b0bd11b6480b5d7b.dir - size: 565896 - nfiles: 357 - grid_search@500-kdd_nsl-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=kdd_nsl dataset=kdd_nsl - data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_kdd_nsl hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=kdd_nsl/logs/gzip_svc/500 hydra.callbacks.study_dump.output_file=kdd_nsl/logs/gzip_svc/500/study.csv - files.directory=kdd_nsl files.reports=reports/gzip_svc/500 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_svc --multirun + md5: 8f94e7db8385fb9f3973eb19b328397a.dir + size: 639777 + nfiles: 326 + grid_search@100-truthseeker-gzip_logistic-false: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_logistic + model.init.distance_matrix=null model.init.symmetric=false hydra.sweeper.study_name=gzip_logistic_truthseeker + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/100/symmetry_false + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/100/study.csv + files.directory=truthseeker files.reports=reports/gzip_logistic/100/symmetry_false + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_logistic --multirun deps: - - path: conf/gzip_svc.yaml + - path: conf/gzip_logistic.yaml hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 + md5: da7adfd9b59783b6cd34f750dfcfb1b5 + size: 1993 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_svc.yaml: + conf/gzip_logistic.yaml: hydra: run: dir: ${dataset}/logs/${stage}/ @@ -19346,121 +9272,35 @@ stages: _target_: database.OptunaStudyDumpCallback storage: ${hydra.sweeper.storage} study_name: ${hydra.sweeper.study_name} - directions: - - maximize - metric_names: - - accuracy + directions: ${direction} + metric_names: ${optimizers} output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 params: - +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.gamma: scale,auto - +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + +model.init.solver: saga + +model.init.penalty: l2,l1 + +model.init.tol: tag(log, interval(1e-5, 1e-1)) + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.fit_intercept: True,False + +model.init.class_weight: balanced,None model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_svc - outs: - - path: kdd_nsl/logs/gzip_svc/500 - hash: md5 - md5: 1ed2e3d83e888471981684eaaa3f3b8e.dir - size: 1613038 - nfiles: 514 - - path: kdd_nsl/reports/gzip_svc/500/train/ - hash: md5 - md5: c53dae7497a8f55965cc708c28280f4e.dir - size: 555797 - nfiles: 384 - grid_search@500-sms_spam-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=sms_spam/logs/gzip_knn/500 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_knn/500/study.csv - files.directory=sms_spam files.reports=reports/gzip_knn/500 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_knn --multirun - deps: - - path: conf/gzip_knn.yaml - hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - conf/gzip_knn.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.num} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler - seed: 123 - consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper direction: ${direction} - storage: sqlite:///optuna.db - study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 max_failure_rate: 1.0 - params: - model.init.k: 1,3,5,7,11 - +model.init.weights: uniform,distance - +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -19472,37 +9312,38 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_knn outs: - - path: sms_spam/logs/gzip_knn/500 + - path: truthseeker/logs/gzip_logistic/100/symmetry_false hash: md5 - md5: 0e5c9c1b5970ef63e76b3adcbb1d9bde.dir - size: 1465483 - nfiles: 514 - - path: sms_spam/reports/gzip_knn/500/train/ + md5: e00ee47514e58ea5f4d39063d194ca52.dir + size: 1288351 + nfiles: 513 + - path: truthseeker/reports/gzip_logistic/100/symmetry_false/train/ hash: md5 - md5: dd14847ddf87817f4410aea70b8fdce3.dir - size: 378991 - nfiles: 331 - grid_search@500-sms_spam-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_logistic - model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_sms_spam - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=sms_spam/logs/gzip_logistic/500 - hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_logistic/500/study.csv - files.directory=sms_spam files.reports=reports/gzip_logistic/500 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_logistic --multirun + md5: 6eaa1b0799b99345f36c3649419ed12f.dir + size: 581607 + nfiles: 364 + grid_search@100-truthseeker-gzip_svc-true: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_svc/100/symmetry_true + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/100/study.csv + files.directory=truthseeker files.reports=reports/gzip_svc/100/symmetry_true + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + ++raise_exception=True --config-name gzip_svc --multirun deps: - - path: conf/gzip_logistic.yaml + - path: conf/gzip_svc.yaml hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_logistic.yaml: + conf/gzip_svc.yaml: hydra: run: dir: ${dataset}/logs/${stage}/ @@ -19514,37 +9355,33 @@ stages: _target_: database.OptunaStudyDumpCallback storage: ${hydra.sweeper.storage} study_name: ${hydra.sweeper.study_name} - directions: ${direction} - metric_names: ${optimizers} + directions: + - maximize + metric_names: + - accuracy output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 + n_trials: 128 + n_jobs: 8 params: - +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.fit_intercept: True,False - +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio + +model.init.kernel: rbf,precomputed + +model.init.C: tag(log, interval(1e-3, 1e3)) + +model.init.gamma: scale,auto + +model.init.class_weight: balanced,null model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -19558,34 +9395,36 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_logistic outs: - - path: sms_spam/logs/gzip_logistic/500 + - path: truthseeker/logs/gzip_svc/100/symmetry_true hash: md5 - md5: 6e6d0761de2d778fbdbebd1d547f04a1.dir - size: 1619183 - nfiles: 514 - - path: sms_spam/reports/gzip_logistic/500/train/ + md5: 4d85a297bae6c4437d8775268b8f09aa.dir + size: 1252991 + nfiles: 513 + - path: truthseeker/reports/gzip_svc/100/symmetry_true/train/ hash: md5 - md5: fb78d7f4f526194a09b6561a121f734e.dir - size: 553072 - nfiles: 361 - grid_search@500-sms_spam-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=sms_spam dataset=sms_spam - data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_sms_spam hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=sms_spam/logs/gzip_svc/500 hydra.callbacks.study_dump.output_file=sms_spam/logs/gzip_svc/500/study.csv - files.directory=sms_spam files.reports=reports/gzip_svc/500 hydra.launcher.n_jobs=-1 + md5: e5dbcf02229d9973d0d948ab7291138c.dir + size: 546664 + nfiles: 384 + grid_search@100-truthseeker-gzip_svc-false: + cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker + data.sample.train_size=100 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_svc/100/symmetry_false + hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/100/study.csv + files.directory=truthseeker files.reports=reports/gzip_svc/100/symmetry_false + hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio ++raise_exception=True --config-name gzip_svc --multirun deps: - path: conf/gzip_svc.yaml hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 + md5: ef6089c75166b6acb57ce97a89157ad9 + size: 1905 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: conf/gzip_svc.yaml: hydra: @@ -19607,29 +9446,25 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 + consider_endpoints: true + n_startup_trials: 256 + n_ei_candidates: 32 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper study_name: ${dataset}_${model_name}_${stage} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + n_trials: 128 + n_jobs: 8 params: +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 + +model.init.C: tag(log, interval(1e-3, 1e3)) +model.init.gamma: scale,auto +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) direction: ${direction} max_failure_rate: 1.0 launcher: @@ -19643,34 +9478,34 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_svc outs: - - path: sms_spam/logs/gzip_svc/500 + - path: truthseeker/logs/gzip_svc/100/symmetry_false hash: md5 - md5: 4b37a4947b8a27e8b050b76a2252f6d2.dir - size: 1542505 - nfiles: 514 - - path: sms_spam/reports/gzip_svc/500/train/ + md5: b33c39d320d25d5bfbd81006713e3d62.dir + size: 1254591 + nfiles: 513 + - path: truthseeker/reports/gzip_svc/100/symmetry_false/train/ hash: md5 - md5: adfaa61acf833b9b2d823fd944876030.dir - size: 543664 + md5: 13ac657603b4c71f4a17d78cbdc69083.dir + size: 547239 nfiles: 384 - grid_search@500-truthseeker-gzip_knn: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_knn_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=truthseeker/logs/gzip_knn/500 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_knn/500/study.csv - files.directory=truthseeker files.reports=reports/gzip_knn/500 hydra.launcher.n_jobs=-1 + grid_search@300-ddos-gzip_knn-true: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=300 + data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=true hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_knn/300/symmetry_true + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/300/study.csv files.directory=ddos + files.reports=reports/gzip_knn/300/symmetry_true hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 ++raise_exception=True --config-name gzip_knn --multirun deps: - path: conf/gzip_knn.yaml hash: md5 - md5: a58015cd6f327e171842b045a2524bfd - size: 2062 + md5: 187b2fd2a0a70b8980acfd256687f05a + size: 1928 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: conf/gzip_knn.yaml: hydra: @@ -19690,11 +9525,11 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true + seed: 123 prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false + consider_endpoints: true n_startup_trials: 10 n_ei_candidates: 24 multivariate: true @@ -19702,18 +9537,15 @@ stages: direction: ${direction} storage: sqlite:///optuna.db study_name: ${dataset}_${model_name}_${stage} - n_trials: 2 - n_jobs: 2 + n_trials: 128 + n_jobs: 8 max_failure_rate: 1.0 params: model.init.k: 1,3,5,7,11 +model.init.weights: uniform,distance +model.init.algorithm: brute - model.init.symmetric: True,False - ++model.init.precompute: true model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name: ${model_name} - ++data.sample.random_state: int(interval(1, 10000)) launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -19725,43 +9557,42 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_knn outs: - - path: truthseeker/logs/gzip_knn/500 + - path: ddos/logs/gzip_knn/300/symmetry_true hash: md5 - md5: 8f89bb6eee2faa7d319f0e667a455558.dir - size: 1449788 - nfiles: 514 - - path: truthseeker/reports/gzip_knn/500/train/ + md5: d23dbd6a384157d616bebeeb6cf41a27.dir + size: 1175564 + nfiles: 513 + - path: ddos/reports/gzip_knn/300/symmetry_true/train/ hash: md5 - md5: 22ad9cc6a9f1fc454ff08e23e1194b6a.dir - size: 382020 - nfiles: 333 - grid_search@500-truthseeker-gzip_logistic: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_logistic - model.init.distance_matrix=null hydra.sweeper.study_name=gzip_logistic_truthseeker - hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 hydra.sweep.dir=truthseeker/logs/gzip_logistic/500 - hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_logistic/500/study.csv - files.directory=truthseeker files.reports=reports/gzip_logistic/500 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_logistic --multirun + md5: 4c887424c72eed237277e641dfcd47e2.dir + size: 191347 + nfiles: 205 + grid_search@300-ddos-gzip_knn-false: + cmd: python -m deckard.layers.optimise stage=train data=ddos dataset=ddos data.sample.train_size=300 + data.sample.test_size=100 model_name=gzip_knn model.init.distance_matrix=null + model.init.symmetric=false hydra.sweeper.study_name=gzip_knn_ddos hydra.sweeper.n_trials=128 + hydra.sweeper.n_jobs=8 hydra.sweep.dir=ddos/logs/gzip_knn/300/symmetry_false + hydra.callbacks.study_dump.output_file=ddos/logs/gzip_knn/300/study.csv files.directory=ddos + files.reports=reports/gzip_knn/300/symmetry_false hydra.launcher.n_jobs=-1 ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + ++raise_exception=True --config-name gzip_knn --multirun deps: - - path: conf/gzip_logistic.yaml + - path: conf/gzip_knn.yaml hash: md5 - md5: 847d4d804fff0b6f2533f90820eebd04 - size: 2205 + md5: 187b2fd2a0a70b8980acfd256687f05a + size: 1928 - path: params.yaml hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 + md5: 486532089f9aed37612260a1f0a2bead + size: 1469 params: - conf/gzip_logistic.yaml: + conf/gzip_knn.yaml: hydra: run: dir: ${dataset}/logs/${stage}/ sweep: dir: ??? - subdir: ${hydra.job.id} + subdir: ${hydra.job.num} callbacks: study_dump: _target_: database.OptunaStudyDumpCallback @@ -19773,118 +9604,27 @@ stages: sweeper: sampler: _target_: optuna.samplers.TPESampler - seed: 123 consider_prior: true - prior_weight: 1.0 - consider_magic_clip: true - consider_endpoints: false - n_startup_trials: 10 - n_ei_candidates: 24 - multivariate: true - _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} - storage: sqlite:///optuna.db - n_jobs: 1 - n_trials: 1 - params: - +model.init.solver: saga - +model.init.penalty: l2,l1,l2,none - +model.init.tol: 1e-4,1e-3,1e-2 - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.fit_intercept: True,False - +model.init.class_weight: balanced,None - model.init.symmetric: True,False - ++model.init.precompute: true - model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio - model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 - launcher: - _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher - n_jobs: 8 - prefer: processes - verbose: 1 - timeout: - pre_dispatch: ${hydra.sweeper.n_jobs} - batch_size: auto - temp_folder: /tmp/deckard - max_nbytes: 100000 - mmap_mode: r - model_name: gzip_logistic - outs: - - path: truthseeker/logs/gzip_logistic/500 - hash: md5 - md5: 536a09eb3f82d03737e3cec6aafdbac8.dir - size: 1605851 - nfiles: 514 - - path: truthseeker/reports/gzip_logistic/500/train/ - hash: md5 - md5: 4560cd0abd0609eebe34c6f578d77f2d.dir - size: 556183 - nfiles: 375 - grid_search@500-truthseeker-gzip_svc: - cmd: python -m deckard.layers.optimise stage=train data=truthseeker dataset=truthseeker - data.sample.train_size=500 data.sample.test_size=100 model_name=gzip_svc model.init.distance_matrix=null - hydra.sweeper.study_name=gzip_svc_truthseeker hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=truthseeker/logs/gzip_svc/500 hydra.callbacks.study_dump.output_file=truthseeker/logs/gzip_svc/500/study.csv - files.directory=truthseeker files.reports=reports/gzip_svc/500 hydra.launcher.n_jobs=-1 - ++raise_exception=True --config-name gzip_svc --multirun - deps: - - path: conf/gzip_svc.yaml - hash: md5 - md5: 957922cb6993eb99866232d944a4a106 - size: 2131 - - path: params.yaml - hash: md5 - md5: 8be0cf0b5f453ffb12b19a1bf1af6468 - size: 1435 - params: - conf/gzip_svc.yaml: - hydra: - run: - dir: ${dataset}/logs/${stage}/ - sweep: - dir: ??? - subdir: ${hydra.job.id} - callbacks: - study_dump: - _target_: database.OptunaStudyDumpCallback - storage: ${hydra.sweeper.storage} - study_name: ${hydra.sweeper.study_name} - directions: - - maximize - metric_names: - - accuracy - output_file: ${dataset}/logs/${model_name}/${data.sample.train_size}/study.csv - sweeper: - sampler: - _target_: optuna.samplers.TPESampler seed: 123 - consider_prior: true prior_weight: 1.0 consider_magic_clip: true - consider_endpoints: false + consider_endpoints: true n_startup_trials: 10 n_ei_candidates: 24 multivariate: true _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper - study_name: ${dataset}_${model_name}_${stage} + direction: ${direction} storage: sqlite:///optuna.db - n_jobs: 2 - n_trials: 2 + study_name: ${dataset}_${model_name}_${stage} + n_trials: 128 + n_jobs: 8 + max_failure_rate: 1.0 params: - +model.init.kernel: rbf,precomputed - +model.init.C: 1e-2,1e-1,1e0,1e1,1e2 - +model.init.gamma: scale,auto - +model.init.class_weight: balanced,null - model.init.symmetric: True,False - ++model.init.precompute: true + model.init.k: 1,3,5,7,11 + +model.init.weights: uniform,distance + +model.init.algorithm: brute model.init.metric: gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio model_name: ${model_name} - data.sample.random_state: int(interval(1, 10000)) - direction: ${direction} - max_failure_rate: 1.0 launcher: _target_: hydra_plugins.hydra_joblib_launcher.joblib_launcher.JoblibLauncher n_jobs: 8 @@ -19896,15 +9636,14 @@ stages: temp_folder: /tmp/deckard max_nbytes: 100000 mmap_mode: r - model_name: gzip_svc outs: - - path: truthseeker/logs/gzip_svc/500 + - path: ddos/logs/gzip_knn/300/symmetry_false hash: md5 - md5: 10808502e0c1c7d780ea6178ae53c19c.dir - size: 1568093 - nfiles: 514 - - path: truthseeker/reports/gzip_svc/500/train/ + md5: 8ed5c114922082086fcec773797c4983.dir + size: 1159774 + nfiles: 513 + - path: ddos/reports/gzip_knn/300/symmetry_false/train/ hash: md5 - md5: 1fb9105254065d6d93e9647e12d650b2.dir - size: 547905 - nfiles: 384 + md5: 4122b0aa41babba1d8a8e141206a1c1a.dir + size: 167245 + nfiles: 167 diff --git a/examples/gzip/dvc.yaml b/examples/gzip/dvc.yaml index b7d4c8d6..367523d2 100644 --- a/examples/gzip/dvc.yaml +++ b/examples/gzip/dvc.yaml @@ -71,112 +71,12 @@ stages: - params.yaml - raw_data/ # Raw data ############################################################################## - test_each_dataset: - matrix: - dataset : [ddos, truthseeker, sms_spam, kdd_nsl] - model_name : [gzip_knn, gzip_svc, gzip_logistic] - cmd : >- - python -m deckard.layers.optimise - stage=train - files.name=${item.model_name} - data.sample.train_size=100 - files.directory=${item.dataset} - data=${item.dataset} - dataset=${item.dataset} - model_name=${item.model_name} - model=${item.model_name} - hydra.run.dir=${item.dataset}/logs/train/${item.model_name} - ++raise_exception=True - deps: - - params.yaml - - ${files.directory}/${files.reports}/train/default/${files.score_dict_file} - outs: - - ${item.dataset}/${files.reports}/train/${item.model_name}/${files.score_dict_file} - - ${item.dataset}/logs/train/${item.model_name} - params: - - data - - model - - scorers - - files - - dataset - - model_name - - device_id - ############################################################################## - test_each_metric: - matrix: - metric: [gzip, zstd, pkl, bz2, lzma,levenshtein, ratio, hamming, jaro, jaro_winkler, seqratio] - model : [gzip_knn,] # gzip_svc, gzip_logistic - dataset : [kdd_nsl] #truthseeker, sms_spam, ddos - train_size: [20] #100, 1000, 10000 - cmd : >- - python -m deckard.layers.optimise - stage=test_each_metric - files.name=${item.model}/${item.metric}/${item.train_size} - files.directory=${item.dataset} - data=${item.dataset} - data.sample.train_size=${item.train_size} - dataset=${item.dataset} - model=${item.model} - model_name=${model_name} - model.init.metric=${item.metric} - model.init.m=-1 - hydra.run.dir=${item.dataset}/logs/test_each_metric/${item.model}/${item.metric}/${item.train_size} - ++raise_exception=True - deps: - - params.yaml - - ${files.directory}/${files.reports}/train/default/${files.score_dict_file} - outs: - - ${item.dataset}/${files.reports}/test_each_metric/${item.model}/${item.metric}/${item.train_size}/${files.score_dict_file} - - ${item.dataset}/logs/test_each_metric/${item.model}/${item.metric}/${item.train_size} - params: - - data - - model - - scorers - - files - - dataset - - model_name - - device_id - # ############################################################################## - test_each_model: - matrix: - metric: [gzip] #, zstd, pkl, bz2, lzma,levenshtein, ratio, hamming, jaro, jaro_winkler, seqratio - model : [gzip_knn, gzip_svc, gzip_logistic] - dataset : [kdd_nsl] #truthseeker, sms_spam, ddos - train_size: [20] #100, 1000, 10000 - cmd : >- - python -m deckard.layers.optimise - stage=test_each_model - files.name=${item.model}/${item.metric}/${item.train_size} - files.directory=${item.dataset} - data=${item.dataset} - data.sample.train_size=${item.train_size} - dataset=${item.dataset} - model=${item.model} - model_name=${model_name} - model.init.metric=${item.metric} - model.init.m=-1 - hydra.run.dir=${item.dataset}/logs/test_each_model/${item.model}/${item.metric}/${item.train_size} - ++raise_exception=True - deps: - - params.yaml - - ${files.directory}/${files.reports}/train/default/${files.score_dict_file} - outs: - - ${item.dataset}/${files.reports}/test_each_model/${item.model}/${item.metric}/${item.train_size}/${files.score_dict_file} - - ${item.dataset}/logs/test_each_model/${item.model}/${item.metric}/${item.train_size} - params: - - data - - model - - scorers - - files - - dataset - - model_name - - device_id - ############################################################################## grid_search: matrix: train_size: [20, 100, 300, 500] # dataset : [ddos, kdd_nsl, sms_spam, truthseeker] # configs: [gzip_knn, gzip_logistic, gzip_svc] + symmetric : [True, False] cmd: >- python -m deckard.layers.optimise stage=train @@ -186,14 +86,17 @@ stages: data.sample.test_size=100 model_name=${item.configs} model.init.distance_matrix=null + model.init.symmetric=${item.symmetric} hydra.sweeper.study_name=${item.configs}_${item.dataset} hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=${item.dataset}/logs/${item.configs}/${item.train_size} + hydra.sweep.dir=${item.dataset}/logs/${item.configs}/${item.train_size}/symmetry_${item.symmetric} hydra.callbacks.study_dump.output_file=${item.dataset}/logs/${item.configs}/${item.train_size}/study.csv files.directory=${item.dataset} - files.reports=${files.reports}/${item.configs}/${item.train_size} + files.reports=${files.reports}/${item.configs}/${item.train_size}/symmetry_${item.symmetric} hydra.launcher.n_jobs=-1 + ++data.sample.random_state=1,2,3,4,5,6,7,8,9,10 + model.init.metric=gzip,lzma,bz2,pkl,zstd,levenshtein,ratio,hamming,jaro,jaro_winkler,seqratio ++raise_exception=True --config-name ${item.configs} --multirun @@ -201,15 +104,17 @@ stages: - params.yaml - conf/${item.configs}.yaml outs: - - ${item.dataset}/logs/${item.configs}/${item.train_size} - - ${item.dataset}/${files.reports}/${item.configs}/${item.train_size}/train/: + - ${item.dataset}/logs/${item.configs}/${item.train_size}/symmetry_${item.symmetric}: + cache: true + persist: true + push: true + - ${item.dataset}/${files.reports}/${item.configs}/${item.train_size}/symmetry_${item.symmetric}/train/: cache: true persist: true push: true params: - conf/${item.configs}.yaml: - hydra - - model_name ############################################################################## # find_best_model: # This isn't actually used in later steps, but it's handy to have these configs ready for a line search instead of a massive grid search # matrix: @@ -221,43 +126,12 @@ stages: # python -m deckard.layers.find_best --storage sqlite:///optuna.db --study_name ${item.model}_${item.dataset} --config_subdir model --params_file best_${item.model}_${item.dataset} --default_config ${item.model} # outs: # - conf/model/best_${item.model}_${item.dataset}.yaml - ############################################################################# - test_each_method: - matrix: - dataset : [ddos] # kdd_nsl, truthseeker, sms_spam, - method: [medoid, sum, svc, hardness, nearmiss,random,knn] - cmd : >- - python -m deckard.layers.optimise - stage=train - +model.init.sampling_method=${item.method} - model.init.m=3 - data.sample.train_size=100 - files.name=${item.method} - files.directory=${item.dataset} - data=${item.dataset} - dataset=${item.dataset} - model_name=${item.method} - hydra.run.dir=${item.dataset}/logs/method/${item.method} - ++raise_exception=True - deps: - - params.yaml - - ${files.directory}/${files.reports}/train/default/${files.score_dict_file} - outs: - - ${item.dataset}/${files.reports}/train/${item.method}/${files.score_dict_file} - - ${item.dataset}/logs/method/${item.method} - params: - - data - - model - - scorers - - files - - dataset - - model_name - - device_id ############################################################################## condense: matrix: dataset : [ddos, kdd_nsl, truthseeker, sms_spam,] # kdd_nsl, truthseeker, sms_spam, model_name : [knn, svc, logistic] + ratio : [1, .9, .8, .7, .6, .5, .4, .3, .2, .1] deps: - params.yaml - conf/condense_${item.model_name}.yaml @@ -270,19 +144,28 @@ stages: data.sample.test_size=100 model_name=condensed_${item.model_name} model=gzip_${item.model_name} + ++model.init.m=${item.ratio} + ++model.init.distance_matrix=${item.dataset}/models/${item.model_name}/${item.ratio}/distance_matrix.npz files.directory=${item.dataset} - files.reports=${files.reports}/condense/${item.model_name}/ + files.reports=${files.reports}/condense/${item.model_name}/${item.ratio}/ hydra.sweeper.study_name=condense_${item.model_name}_${item.dataset} - hydra.sweeper.n_trials=1024 + hydra.sweeper.n_trials=128 hydra.sweeper.n_jobs=8 - hydra.sweep.dir=${item.dataset}/logs/condense/${item.model_name}/ + hydra.sweep.dir=${item.dataset}/logs/condense/${item.model_name}/${item.ratio}/ hydra.callbacks.study_dump.output_file=${item.dataset}/logs/${item.model_name}/study.csv hydra.launcher.n_jobs=-1 --config-name condense_${item.model_name} --multirun outs: - - ${item.dataset}/logs/condense/${item.model_name}/ - - ${item.dataset}/${files.reports}/condense/${item.model_name}/: + - ${item.dataset}/logs/condense/${item.model_name}/${item.ratio}: + cache: true + persist: true + push: true + - ${item.dataset}/${files.reports}/condense/${item.model_name}/${item.ratio}: + cache: true + persist: true + push: true + - ${item.dataset}/models/${item.model_name}/${item.ratio}/: cache: true persist: true push: true @@ -291,7 +174,7 @@ stages: - hydra compile: matrix: - dataset : [kdd_nsl, sms_spam, ddos] + dataset : [kdd_nsl, sms_spam, ddos, truthseeker] stage : [gzip_knn, gzip_svc, gzip_logistic, condense/knn, condense/svc, condense/logistic] deps: - ${item.dataset}/${files.reports}/${item.stage}/ @@ -304,7 +187,7 @@ stages: ############################################################################## clean: matrix: - dataset : [kdd_nsl, sms_spam, ddos] + dataset : [kdd_nsl, sms_spam, ddos, truthseeker] stage : [gzip_knn, gzip_svc, gzip_logistic, condense/knn, condense/svc, condense/logistic] deps: - ${item.dataset}/${files.reports}/${item.stage}.csv @@ -318,10 +201,12 @@ stages: params: - conf/clean.yaml: - replace + - drop_values + - replace_cols ############################################################################## merge: matrix: - dataset : [kdd_nsl, sms_spam, ddos] + dataset : [kdd_nsl, sms_spam, ddos, truthseeker] deps: - ${item.dataset}/plots/clean/gzip_knn.csv - ${item.dataset}/plots/clean/gzip_logistic.csv @@ -338,7 +223,7 @@ stages: ############################################################################## merge_condense: matrix: - dataset : [kdd_nsl, sms_spam, ddos] + dataset : [kdd_nsl, sms_spam, ddos, truthseeker] deps: - ${item.dataset}/plots/clean/condense/knn.csv - ${item.dataset}/plots/clean/condense/logistic.csv @@ -355,7 +240,7 @@ stages: ############################################################################## plot: matrix: - dataset : [kdd_nsl, sms_spam, ddos] + dataset : [kdd_nsl, sms_spam, ddos, truthseeker] cmd: >- python -m deckard.layers.plots --path ${item.dataset}/plots/ @@ -363,6 +248,7 @@ stages: -c conf/plots.yaml deps: - ${item.dataset}/plots/merged.csv + - conf/plots.yaml plots: - ${item.dataset}/plots/${line_plot[0].file} - ${item.dataset}/plots/${line_plot[1].file} @@ -379,7 +265,7 @@ stages: ############################################################################## plot_condense: matrix: - dataset : [kdd_nsl, sms_spam, ddos] + dataset : [kdd_nsl, sms_spam, ddos, truthseeker] cmd: >- python -m deckard.layers.plots --path ${item.dataset}/plots/ @@ -387,22 +273,72 @@ stages: -c conf/condensed_plots.yaml deps: - ${item.dataset}/plots/condensed_merged.csv + - conf/condensed_plots.yaml plots: - - ${item.dataset}/plots/sampling_method_vs_accuracy.pdf - - ${item.dataset}/plots/sampling_method_vs_train_time.pdf - - ${item.dataset}/plots/sampling_method_vs_predict_time.pdf + - ${item.dataset}/plots/condensing_method_vs_accuracy.pdf + - ${item.dataset}/plots/condensing_method_vs_train_time.pdf + - ${item.dataset}/plots/condensing_method_vs_predict_time.pdf params: - conf/condensed_plots.yaml: + - cat_plot + ############################################################################## + merge_datasets: + cmd: >- + python merge.py + --big_dir . + --little_dir . + --data_file sms_spam/plots/merged.csv + --little_dir_data_file kdd_nsl/plots/merged.csv ddos/plots/merged.csv truthseeker/plots/merged.csv kdd_nsl/plots/condensed_merged.csv ddos/plots/condensed_merged.csv truthseeker/plots/condensed_merged.csv sms_spam/plots/condensed_merged.csv + --output_folder combined/plots/ + --output_file merged.csv + deps: + - sms_spam/plots/merged.csv + - kdd_nsl/plots/merged.csv + - ddos/plots/merged.csv + - truthseeker/plots/merged.csv + outs: + - combined/plots/merged.csv + ############################################################################## + plot_merged: + cmd: >- + python -m deckard.layers.plots + --path combined/plots/ + --file combined/plots/merged.csv + -c conf/merged_plots.yaml + deps: + - combined/plots/merged.csv + - conf/merged_plots.yaml + plots: + - combined/plots/compressor_metric_vs_accuracy.pdf + - combined/plots/compressor_metric_vs_train_time.pdf + - combined/plots/compressor_metric_vs_predict_time.pdf + - combined/plots/string_metric_vs_accuracy.pdf + - combined/plots/string_metric_vs_train_time.pdf + - combined/plots/string_metric_vs_predict_time.pdf + - combined/plots/symmetric_models_vs_accuracy.pdf + - combined/plots/symmetric_models_vs_train_time.pdf + - combined/plots/symmetric_models_vs_predict_time.pdf + - combined/plots/condensing_methods_vs_accuracy.pdf + - combined/plots/condensing_methods_vs_train_time.pdf + - combined/plots/condensing_methods_vs_predict_time.pdf + - combined/plots/models_vs_accuracy.pdf + - combined/plots/models_vs_train_time.pdf + - combined/plots/models_vs_predict_time.pdf + params: + - conf/merged_plots.yaml: + - cat_plot + - conf/merged_plots.yaml: - line_plot - # copy: - # matrix: - # dataset : [kdd_nsl, truthseeker, sms_spam, ddos] - # cmd: >- - # rm -rf ~/Gzip-KNN/figs/${item.dataset}/ && - # mkdir -p ~/Gzip-KNN/figs/${item.dataset}/ && - # cp -r ${item.dataset}/plots/* ~/Gzip-KNN/figs/${item.dataset}/ - # deps: - # - ${item.dataset}/plots/ + copy: + matrix: + dataset : [kdd_nsl, truthseeker, sms_spam, ddos, combined] + cmd: >- + rm -rf ~/Gzip-KNN/figs/${item.dataset}/ && + mkdir -p ~/Gzip-KNN/figs/${item.dataset}/ && + cp -r ${item.dataset}/plots/* ~/Gzip-KNN/figs/${item.dataset}/ && + rm -rf ~/Gzip-KNN/figs/${item.dataset}/.gitignore + deps: + - ${item.dataset}/plots/ # ############################################################################## # # attack: # # cmd: python -m deckard.layers.experiment attack diff --git a/examples/gzip/gzip_classifier.py b/examples/gzip/gzip_classifier.py index 49d4e159..fb4aef27 100644 --- a/examples/gzip/gzip_classifier.py +++ b/examples/gzip/gzip_classifier.py @@ -16,6 +16,7 @@ # python -m pip install numpy scikit-learn tqdm scikit-learn-extra pandas imbalanced-learn import numpy as np +import warnings import gzip from tqdm import tqdm from pathlib import Path @@ -33,6 +34,7 @@ from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression from sklearn_extra.cluster import KMedoids +from sklearn.exceptions import DataConversionWarning from imblearn.under_sampling import ( CondensedNearestNeighbour, NearMiss, @@ -46,43 +48,46 @@ from batchMixin import BatchedMixin +warnings.simplefilter(action="ignore", category=FutureWarning) +warnings.simplefilter(action="ignore", category=UserWarning) + logger = logging.getLogger(__name__) -def _gzip_compressor(x): +def _gzip_len(x): return len(gzip.compress(str(x).encode())) -def _lzma_compressor(x): +def _lzma_len(x): import lzma return len(lzma.compress(str(x).encode())) -def _bz2_compressor(x): +def _bz2_len(x): import bz2 return len(bz2.compress(str(x).encode())) -def _zstd_compressor(x): +def _zstd_len(x): import zstd return len(zstd.compress(str(x).encode())) -def _pickle_compressor(x): +def _pickle_len(x): import pickle return len(pickle.dumps(x)) compressors = { - "gzip": _gzip_compressor, - "lzma": _lzma_compressor, - "bz2": _bz2_compressor, - "zstd": _zstd_compressor, - "pkl": _pickle_compressor, + "gzip": _gzip_len, + "lzma": _lzma_len, + "bz2": _bz2_len, + "zstd": _zstd_len, + "pkl": _pickle_len, } @@ -102,15 +107,15 @@ def ncd( float: The normalized compression distance between x1 and x2 """ - compressor = ( + compressor_len = ( compressors[method] if method in compressors.keys() else compressors["gzip"] ) x1 = str(x1) x2 = str(x2) - Cx1 = compressor(x1) if cx1 is None else cx1 - Cx2 = compressor(x2) if cx2 is None else cx2 + Cx1 = compressor_len(x1) if cx1 is None else cx1 + Cx2 = compressor_len(x2) if cx2 is None else cx2 x1x2 = " ".join([x1, x2]) - Cx1x2 = compressor(x1x2) + Cx1x2 = compressor_len(x1x2) min_ = min(Cx1, Cx2) max_ = max(Cx1, Cx2) ncd = (Cx1x2 - min_) / max_ @@ -131,6 +136,17 @@ def ncd( **string_metrics, } +all_condensers = [ + "sum", + "mean", + "medoid", + "random", + "knn", + "svc", + "hardness", + "nearmiss", +] + def _calculate_string_distance(x1, x2, method): x1 = str(x1) @@ -182,7 +198,6 @@ def __init__( distance_matrix=None, metric="gzip", symmetric=False, - precompute=True, **kwargs, ): """ @@ -197,24 +212,23 @@ def __init__( If a path is provided, the file will be loaded. If an array is provided, it will be used directly. Default is None. symmetric (bool): If True, the distance matrix will be treated as symmetric. Default is False. - precompute (bool): If True, the distance matrix will be precomputed and stored in self.distance_matrix during the fit method and a sklearn KNeighborsClassifier object will be created and stored in self.clf_. Raises: ValueError: If distance_matrix is not a path to a numpy file or a numpy array. NotImplementedError: If the metric is not supported. """ kwarg_string = str([f"{key}={value}" for key, value in kwargs.items()]) - logger.info( - f"Initializing GzipClassifier with m={m}, method={sampling_method}, distance_matrix={distance_matrix}, metric={metric}, symmetric={symmetric}, precompute={precompute}, {kwarg_string}", + logger.debug( + f"Initializing GzipClassifier with m={m}, method={sampling_method}, distance_matrix={distance_matrix}, metric={metric}, symmetric={symmetric}, {kwarg_string}", ) self.m = m self.sampling_method = sampling_method if metric in compressors.keys(): - logger.info(f"Using NCD metric with {metric} compressor.") + logger.debug(f"Using NCD metric with {metric} compressor.") self._distance = ncd self.metric = metric elif metric in string_metrics.keys(): - logger.info(f"Using {metric} metric") + logger.debug(f"Using {metric} metric") self._distance = _calculate_string_distance self.metric = metric else: @@ -231,7 +245,6 @@ def __init__( self._calculate_distance_matrix = ( self._calculate_rectangular_distance_matrix ) - self.precompute = precompute # If True, the distance matrix will be precomputed and stored in self.distance_matrix during the fit method and a sklearn KNeighborsClassifier object will be created and stored in self.clf_. self.distance_matrix = distance_matrix for key, value in kwargs.items(): setattr(self, key, value) @@ -258,6 +271,7 @@ def _calculate_rectangular_distance_matrix( desc="Calculating asymmetric distance matrix.", leave=False, dynamic_ncols=True, + position=2, ) Cx1 = Cx1 if Cx1 is not None else [None] * len(x1) Cx2 = Cx2 if Cx2 is not None else [None] * len(x2) @@ -310,6 +324,7 @@ def _calculate_lower_triangular_distance_matrix( desc="Calculating symmetric distance metrix.", leave=False, dynamic_ncols=True, + position=0, ) Cx1 = Cx1 if Cx1 is not None else [None] * len(x1) Cx2 = Cx2 if Cx2 is not None else [None] * len(x2) @@ -420,8 +435,20 @@ def _prepare_training_matrix(self, n_jobs=-1): n_jobs=n_jobs, ) self._save_distance_matrix(self.distance_matrix, distance_matrix) - elif isinstance(self.distance_matrix, np.ndarray): + elif isinstance(self.distance_matrix, np.ndarray) and len( + self.distance_matrix, + ) == len(self.X_): distance_matrix = self.distance_matrix + elif isinstance(self.distance_matrix, np.ndarray) and len( + self.distance_matrix, + ) != len(self.X_): + distance_matrix = self._calculate_distance_matrix( + self.X_, + self.X_, + Cx1=self.Cx_, + Cx2=self.Cx_, + n_jobs=n_jobs, + ) elif isinstance(self.distance_matrix, type(None)): distance_matrix = self._calculate_distance_matrix( self.X_, @@ -434,6 +461,15 @@ def _prepare_training_matrix(self, n_jobs=-1): raise ValueError( f"distance_matrix must be a path to a numpy file or a numpy array, got {type(self.distance_matrix)}", ) + assert ( + distance_matrix.shape[0] == distance_matrix.shape[1] + ), f"Distance matrix must be square, got {distance_matrix.shape}" + assert ( + len(self.X_) == distance_matrix.shape[0] + ), f"Expected len(X) == {distance_matrix.shape[0]}" + assert ( + len(self.y_) == distance_matrix.shape[0] + ), f"Expected len(y) == {distance_matrix.shape[0]}" return distance_matrix def _find_best_samples(self, method="medoid", n_jobs=-1): @@ -521,15 +557,18 @@ def _find_best_samples(self, method="medoid", n_jobs=-1): distance_matrix, columns=list(range(len(distance_matrix))), ) + distance_matrix, y = model.fit_resample(distance_matrix, y) y = pd.DataFrame(y, columns=["y"]) y.index = list(range(len(y))) - distance_matrix, y = model.fit_resample(distance_matrix, y) indices = y.index[: m * n_classes] else: raise NotImplementedError(f"Method {method} not supported") + + if len(indices) > len(self.X_): + indices = indices[: len(self.X_)] return indices - def fit(self, X: np.ndarray, y: np.ndarray, n_jobs=-1): + def fit(self, X: np.ndarray, y: np.ndarray, n_jobs=-1, X_test=None, y_test=None): """Fit the model using X as training data and y as target values. If self.m is not -1, the best m samples will be selected using the method specified in self.sampling_method. Args: @@ -540,7 +579,7 @@ def fit(self, X: np.ndarray, y: np.ndarray, n_jobs=-1): GzipClassifier: The fitted model """ assert len(X) == len(y), f"Expected {len(X)} == {len(y)}" - logger.info(f"Fitting with X of shape {X.shape} and y of shape {y.shape}") + logger.debug(f"Fitting with X of shape {X.shape} and y of shape {y.shape}") self.X_ = np.array(X) if not isinstance(X, np.ndarray) else X y = np.array(y) if not isinstance(y, np.ndarray) else y if len(np.squeeze(y).shape) == 1: @@ -554,7 +593,7 @@ def fit(self, X: np.ndarray, y: np.ndarray, n_jobs=-1): flat_y = np.argmax(y, axis=1) counts = np.bincount(flat_y) self.counts_ = counts - logger.info(f"Num Classes: {self.n_classes_}, counts: {counts}") + logger.debug(f"Num Classes: {self.n_classes_}, counts: {counts}") self.n_features_ = X.shape[1] if len(X.shape) > 1 else 1 self.classes_ = range(len(unique_labels(y))) @@ -579,19 +618,18 @@ def fit(self, X: np.ndarray, y: np.ndarray, n_jobs=-1): elif self.m == -1: distance_matrix = self._prepare_training_matrix(n_jobs=n_jobs) self.distance_matrix = distance_matrix - elif self.m is None or self.m == 0: - pass else: raise ValueError( f"Expected {self.m} to be -1, 0, a positive integer or a float between 0 and 1. Got type {type(self.m)}", ) - if self.precompute is True: - self.distance_matrix = self._prepare_training_matrix(n_jobs=n_jobs) - self.clf_ = self.clf_.fit(self.distance_matrix, self.y_) - else: - raise NotImplementedError( - f"Precompute {self.precompute} not supported for type(self.clf_) {type(self.clf_)}", - ) + self.distance_matrix = self._prepare_training_matrix(n_jobs=n_jobs) + with warnings.catch_warnings(): + warnings.filterwarnings("error") + try: + self.clf_ = self.clf_.fit(self.distance_matrix, self.y_) + except DataConversionWarning: + y = np.ravel(self.y_) + self.clf_ = self.clf_.fit(self.distance_matrix, y) return self def _set_best_indices(self, indices): @@ -607,11 +645,9 @@ def _set_best_indices(self, indices): indices ] # select the transposed columns at the indices self.distance_matrix = distance_matrix.T # transpose the matrix again - logger.info( + logger.debug( f"Selected {len(self.X_)} samples using method {self.sampling_method}.", ) - counts = np.bincount(np.argmax(self.y_, axis=1)) - logger.info(f"Num Classes: {self.n_classes_}, counts: {counts}") assert len(self.X_) == len( self.y_, ), f"Expected {len(self.X_)} == {len(self.y_)}" @@ -630,7 +666,7 @@ def predict(self, X: np.ndarray): np.ndarray: The predicted class labels """ check_is_fitted(self) - logger.info(f"Predicting with X of shape {X.shape}") + logger.debug(f"Predicting with X of shape {X.shape}") if self.metric in compressors.keys(): compressor = compressors[self.metric] Cx2 = Parallel(n_jobs=-1)( @@ -687,7 +723,8 @@ def score(self, X: np.ndarray, y: np.ndarray): return accuracy_score(y, y_pred) -class BatchedGzipClassifier(GzipClassifier, BatchedMixin): +class BatchedGzipClassifier(BatchedMixin, GzipClassifier): + pass @@ -700,7 +737,6 @@ def __init__( distance_matrix=None, metric="gzip", symmetric=False, - precompute=True, **kwargs, ): super().__init__( @@ -709,7 +745,6 @@ def __init__( distance_matrix=distance_matrix, metric=metric, symmetric=symmetric, - precompute=precompute, **kwargs, ) self.clf_ = KNeighborsClassifier(n_neighbors=k, metric="precomputed", **kwargs) @@ -726,7 +761,7 @@ def predict(self, X: np.ndarray, n_jobs=-1): """ check_is_fitted(self) - logger.info(f"Predicting with X of shape {X.shape}") + logger.debug(f"Predicting with X of shape {X.shape}") # Pre-compress samples not working if self.metric in compressors.keys(): compressor = compressors[self.metric] @@ -760,31 +795,11 @@ def predict(self, X: np.ndarray, n_jobs=-1): len(X), len(self.X_), ), f"Expected {distance_matrix.shape} == ({len(X)}, {len(self.X_)})" - y_pred = [] - if self.precompute is True: - y_pred = self.clf_.predict(distance_matrix) - else: - for i in tqdm( - range(len(X)), - desc="Predicting", - leave=False, - total=len(X), - dynamic_ncols=True, - ): - # Sort the distances and get the nearest k samples - sorted_idx = np.argsort(distance_matrix[i]) - # Get the first k samples - nearest_k = sorted_idx[: self.k] - # Get the labels of the nearest samples - nearest_labels = list(self.y_[nearest_k]) - # predict class - unique, counts = np.unique(nearest_labels, return_counts=True) - # Get the most frequent label - y_pred.append(unique[np.argmax(counts)]) + y_pred = self.clf_.predict(distance_matrix) return y_pred -class BatchedGzipKNN(GzipKNN, BatchedMixin): +class BatchedGzipKNN(BatchedMixin, GzipKNN): pass @@ -796,14 +811,11 @@ def __init__( distance_matrix=None, metric="gzip", symmetric=False, - precompute=True, **kwargs, ): - self.precompute = precompute clf = LogisticRegression(**kwargs) super().__init__( clf_=clf, - precompute=precompute, sampling_method=sampling_method, m=m, distance_matrix=distance_matrix, @@ -813,7 +825,7 @@ def __init__( ) -class BatchedGzipLogisticRegressor(GzipLogisticRegressor, BatchedMixin): +class BatchedGzipLogisticRegressor(BatchedMixin, GzipLogisticRegressor): pass @@ -826,14 +838,11 @@ def __init__( distance_matrix=None, metric="gzip", symmetric=False, - precompute=True, **kwargs, ): - self.precompute = precompute clf = SVC(kernel=kernel, **kwargs) super().__init__( clf_=clf, - precompute=precompute, sampling_method=sampling_method, m=m, distance_matrix=distance_matrix, @@ -883,10 +892,13 @@ def test_model( ) -> dict: """ Args: - X (np.ndarray): The input data - y (np.ndarray): The target labels - train_size (int): The number of samples to use for training. Default is 100. - test_size (int): The number of samples to use for testing. Default is 100. + X_train (np.ndarray): The input data + X_test (np.ndarray): The test data + y_train (np.ndarray): The target labels + y_test (np.ndarray): The test labels + model_type (str): The type of model to use. Choices are "knn", "logistic", "svc". + optimizer (str): The metric to optimize. Choices are "accuracy", "f1", "precision", "recall". + batched (bool): If True, a batched model will be used. Default is False. **kwargs: Additional keyword arguments to pass to the GzipClassifier Returns: dict: A dictionary containing the accuracy, train_time, and pred_time @@ -898,7 +910,8 @@ def test_model( alias = model_scorers[model_type] scorer = scorers[alias] start = time.time() - model.fit(X_train, y_train) + + model.fit(X_train, y_train, X_test=X_test, y_test=y_test) check_is_fitted(model) end = time.time() train_time = end - start @@ -909,7 +922,7 @@ def test_model( score = round(scorer(y_test, predictions), 3) print(f"Training time: {train_time}") print(f"Prediction time: {pred_time}") - print(f"{alias} is: {score}") + print(f"{alias.capitalize()} is: {score}") score_dict = { f"{alias.lower()}": score, "train_time": train_time, @@ -935,14 +948,9 @@ def load_data(dataset, precompressed): LabelEncoder().fit(y).transform(y) ) # Turns the labels "alt.atheism" and "talk.religion.misc" into 0 and 1 elif dataset == "kdd_nsl": - df = pd.read_csv("raw_data/kdd_nsl.csv") - y = df["label"] - X = df.drop("label", axis=1) - elif dataset == "kdd_nsl": - df = pd.read_csv("raw_data/kdd_nsl.csv") + df = pd.read_csv("raw_data/kdd_nsl_undersampled_10000.csv") y = df["label"] X = df.drop("label", axis=1) - X = np.array(X) elif dataset == "make_classification": X, y = make_classification( n_samples=1000, @@ -952,7 +960,7 @@ def load_data(dataset, precompressed): ) y = LabelEncoder().fit(y).transform(y) elif dataset == "truthseeker": - df = pd.read_csv("raw_data/truthseeker.csv") + df = pd.read_csv("raw_data/truthseeker_undersampled_8000.csv") y = df["BotScoreBinary"] X = df.drop("BotScoreBinary", axis=1) elif dataset == "sms-spam": @@ -1002,7 +1010,7 @@ def main(args: argparse.Namespace): Args: args (argparse.Namespace): The command line arguments Usage: - python gzip_classifier.py --compressor gzip --k 3 --m 100 --method random --distance_matrix distance_matrix --dataset kdd_nsl + python python gzip_classifier.py --metric gzip --m 10 --sampling_method svc --dataset kdd_nsl k=3 """ X, y = load_data(dataset=args.dataset, precompressed=args.precompressed) @@ -1022,28 +1030,96 @@ def main(args: argparse.Namespace): kwarg_args = params.pop("kwargs") # conver list of key-value pairs to dictionary kwarg_args = dict([arg.split("=") for arg in kwarg_args]) + for k, v in kwarg_args.items(): + # Typecast the values to the correct type + try: + kwarg_args[k] = eval(v) + except: # noqa E722 + kwarg_args[k] = v params.update(**kwarg_args) - params["precompute"] = True X = np.array(X) if not isinstance(X, np.ndarray) else X y = np.array(y) if not isinstance(y, np.ndarray) else y test_model(X_train, X_test, y_train, y_test, **params) parser = argparse.ArgumentParser() -parser.add_argument("--model_type", type=str, default="knn") -parser.add_argument("--precompute", action="store_true") -parser.add_argument("--symmetric", action="store_true") -parser.add_argument("--metric", type=str, default="gzip", choices=all_metrics) -parser.add_argument("--m", type=int, default=-1) -parser.add_argument("--sampling_method", type=str, default="random") -parser.add_argument("--distance_matrix", type=str, default=None) -parser.add_argument("--dataset", type=str, default="kdd_nsl") -parser.add_argument("--train_size", type=int, default=100) -parser.add_argument("--test_size", type=int, default=100) -parser.add_argument("--optimizer", type=str, default="accuracy") -parser.add_argument("--precompressed", action="store_true") -parser.add_argument("--random_state", type=int, default=42) -parser.add_argument("kwargs", nargs=argparse.REMAINDER) +parser.add_argument( + "--model_type", + type=str, + default="knn", + help="The type of model to use. Choices are knn, logistic, svc", +) +parser.add_argument( + "--symmetric", + action="store_true", + help="If True, the distance matrix will be treated as symmetric. Default is False.", +) +parser.add_argument( + "--metric", + type=str, + default="gzip", + choices=all_metrics, + help=f"The metric used to calculate the distance between samples. Choices are {list(all_metrics.keys())}", +) +parser.add_argument( + "--m", + type=int, + default=-1, + help="The number of best samples to use. If -1, all samples will be used.", +) +parser.add_argument( + "--sampling_method", + type=str, + default="random", + help=f"The method used to select the best training samples. Choices are {all_condensers}", +) +parser.add_argument( + "--distance_matrix", + type=str, + default=None, + help="The path to a numpy array representing the distance matrix. If a path is provided, the file will be loaded. Default is None.", +) +parser.add_argument( + "--dataset", + type=str, + default="kdd_nsl", + help="The dataset to use. Choices are 20newsgroups, kdd_nsl, make_classification, truthseeker, sms-spam, ddos.", +) +parser.add_argument( + "--train_size", + type=int, + default=100, + help="The number of samples to use for training. Default is 100.", +) +parser.add_argument( + "--test_size", + type=int, + default=100, + help="The number of samples to use for testing. Default is 100.", +) +parser.add_argument( + "--optimizer", + type=str, + default="accuracy", + help="The metric to use for optimization. Default is accuracy.", +) +parser.add_argument( + "--precompressed", + action="store_true", + help="If True, the data will be precompressed using gzip.", +) +parser.add_argument( + "--random_state", + type=int, + default=42, + help="The random state to use. Default is 42.", +) +parser.add_argument( + "kwargs", + nargs=argparse.REMAINDER, + help="Additional keyword arguments to pass to the GzipClassifier", +) + if __name__ == "__main__": args = parser.parse_args() diff --git a/examples/gzip/objective.py b/examples/gzip/objective.py new file mode 100644 index 00000000..39e4185e --- /dev/null +++ b/examples/gzip/objective.py @@ -0,0 +1,54 @@ +import optuna +from gzip_classifier import all_metrics + + +def objective(trial: optuna.Trial): + model_type = trial.suggest_categorical("model_type", ["knn", "logistic", "svc"]) + metric = trial.suggest_categorical("model.init.metric", all_metrics.keys()) + if model_type == "knn": + k = trial.suggest_categorical("k", [3, 5, 7, 9, 11]) + weights = trial.suggest_categorical("weights", ["uniform", "distance"]) + algorithm = trial.suggest_categorical("algorithm", ["brute"]) + params = {"k": k, "weights": weights, "algorithm": algorithm} + elif model_type == "logistic": + C = trial.suggest_loguniform("C", 1e-10, 1e10) + solver = trial.suggest_categorical("solver", ["saga"]) + penalty = trial.suggest_categorical("penalty", ["l1", "l2", None]) + fit_intercept = trial.suggest_categorical("fit_intercept", [True, False]) + class_weight = trial.suggest_categorical("class_weight", ["balanced", None]) + params = { + "C": C, + "solver": solver, + "penalty": penalty, + "fit_intercept": fit_intercept, + "class_weight": class_weight, + } + elif model_type == "svc": + C = trial.suggest_loguniform("C", 1e-10, 1e10) + kernel = trial.suggest_categorical( + "kernel", + ["linear", "rbf", "poly", "sigmoid"], + ) + class_weight = trial.suggest_categorical("class_weight", ["balanced", None]) + if kernel == "poly": + degree = trial.suggest_int("degree", 2, 5) + params = { + "C": C, + "kernel": kernel, + "degree": degree, + "class_weight": class_weight, + } + elif kernel == "rbf": + gamma = trial.suggest_categorical("gamma", ["auto", "scale"]) + params = { + "C": C, + "kernel": kernel, + "gamma": gamma, + "class_weight": class_weight, + } + else: + params = {"C": C, "kernel": kernel, "class_weight": class_weight} + else: + raise NotImplementedError(f"Model type {model_type} not supported.") + params["metric"] = metric + params["model_name"] = f"{metric}_{model_type}" diff --git a/examples/gzip/params.yaml b/examples/gzip/params.yaml deleted file mode 100644 index 43dbcb17..00000000 --- a/examples/gzip/params.yaml +++ /dev/null @@ -1,88 +0,0 @@ -data: - _target_: deckard.base.data.Data - name: https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/d6a22cdb42a1db624c89f0298cb4f654d3812703/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label -dataset: kdd_nsl -direction: -- maximize -files: - _target_: deckard.base.files.FileConfig - attack_dir: attacks - attack_file: attack - attack_type: .pkl - data_dir: data - data_file: data - data_type: .pkl - directory: output - model_dir: model - model_file: model - model_type: .pkl - name: default - params_file: params.yaml - predictions_file: predictions.json - reports: reports - score_dict_file: score_dict.json -model: - _target_: deckard.base.model.Model - art: - _target_: deckard.base.model.art_pipeline.ArtPipeline - initialize: - nb_classes: 3 - library: sklearn - data: - _target_: deckard.base.data.Data - name: https://gist.githubusercontent.com/simplymathematics/8c6c04bd151950d5ea9e62825db97fdd/raw/d6a22cdb42a1db624c89f0298cb4f654d3812703/kdd_nsl.csv - sample: - _target_: deckard.base.data.SklearnDataSampler - random_state: 0 - stratify: true - test_size: 100 - train_size: 100 - sklearn_pipeline: - encoder: - handle_unknown: use_encoded_value - name: sklearn.preprocessing.OrdinalEncoder - unknown_value: -1 - preprocessor: - name: sklearn.preprocessing.StandardScaler - with_mean: true - with_std: true - target: label - init: - _target_: deckard.base.model.ModelInitializer - compressor: gzip - distance_matrix: output/model/kdd_nsl/gzip_classifier/gzip/0-100.npz - k: 1 - m: -1 - method: random - name: gzip_classifier.GzipClassifier - library: sklearn -model_name: gzip_classifier -optimizers: -- accuracy -scorers: - _target_: deckard.base.scorer.ScorerDict - accuracy: - _target_: deckard.base.scorer.ScorerConfig - direction: maximize - name: sklearn.metrics.accuracy_score - log_loss: - _target_: deckard.base.scorer.ScorerConfig - direction: minimize - name: sklearn.metrics.log_loss -stage: train diff --git a/examples/pytorch/cifar10/.dvc/tmp/rwlock b/examples/pytorch/cifar10/.dvc/tmp/rwlock new file mode 100644 index 00000000..0967ef42 --- /dev/null +++ b/examples/pytorch/cifar10/.dvc/tmp/rwlock @@ -0,0 +1 @@ +{} diff --git a/examples/security/classification/.gitignore b/examples/security/classification/.gitignore index 8a746d89..273db2f4 100644 --- a/examples/security/classification/.gitignore +++ b/examples/security/classification/.gitignore @@ -1,3 +1,4 @@ logs/ multirun/ output/ +/retrain diff --git a/examples/security/classification/dvc.lock b/examples/security/classification/dvc.lock index 01a4ce87..a0fe541c 100644 --- a/examples/security/classification/dvc.lock +++ b/examples/security/classification/dvc.lock @@ -329,8 +329,8 @@ stages: size: 950 - path: models.sh hash: md5 - md5: 45472713dfccf0cd62509e7d62e223fa - size: 5807 + md5: 509157bdd5b524a21b8294dc2409a969 + size: 5887 - path: output/reports/train/default/params.yaml hash: md5 md5: d4e0a34b2b15765ca71fa5ecaf7e3826 @@ -425,75 +425,77 @@ stages: outs: - path: logs/models/ hash: md5 - md5: d9c5585db1b343a23229a2fb5e77cbef.dir - size: 4828874 - nfiles: 60 + md5: fd9e6aad79d8a1be29d42da86fd11a98.dir + size: 1366301 + nfiles: 24 - path: model.db hash: md5 - md5: de6e467e793b2519ea5db993786e263e - size: 4870144 + md5: 676963d31977a42501b4243cb25ab935 + size: 593920 compile_models: cmd: python -m deckard.layers.compile --report_folder output/reports/train/ --results_file output/train.csv deps: - path: logs/models/ hash: md5 - md5: d9c5585db1b343a23229a2fb5e77cbef.dir - size: 4828874 - nfiles: 60 + md5: fd9e6aad79d8a1be29d42da86fd11a98.dir + size: 1366301 + nfiles: 24 - path: model.db hash: md5 - md5: de6e467e793b2519ea5db993786e263e - size: 4870144 + md5: 676963d31977a42501b4243cb25ab935 + size: 593920 - path: output/reports/train/ hash: md5 - md5: fae483c6435daa9d29c947f2bce41511.dir - size: 512957700 - nfiles: 9852 + md5: 702efbf0ca05f21241fbfcbaeac9712b.dir + size: 52545076 + nfiles: 1548 outs: - path: output/train.csv hash: md5 - md5: a048280df159bb5ee1ce118d0d3cfd14 - size: 3559023 + md5: f0e4e7434085d033c5038fb1723acc25 + size: 610341 find_best_model@rbf: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model - --params_file best_rbf --study_name=rbf_100_10000 --default_config model.yaml + --params_file best_rbf --study_name=rbf_100_10000 --default_config default.yaml + --storage_name sqlite:///model.db deps: - path: logs/models/ hash: md5 - md5: d9c5585db1b343a23229a2fb5e77cbef.dir - size: 4828874 - nfiles: 60 + md5: fd9e6aad79d8a1be29d42da86fd11a98.dir + size: 1366301 + nfiles: 24 - path: model.db hash: md5 - md5: de6e467e793b2519ea5db993786e263e - size: 4870144 + md5: 676963d31977a42501b4243cb25ab935 + size: 593920 - path: output/train.csv hash: md5 - md5: a048280df159bb5ee1ce118d0d3cfd14 - size: 3559023 + md5: f0e4e7434085d033c5038fb1723acc25 + size: 610341 outs: - path: conf/model/best_rbf.yaml hash: md5 - md5: 0a90767d020934a3cd6d0c42a6f21606 - size: 357 + md5: 4932ceac75d6256ce2a7864aa4a5ea3c + size: 359 find_best_model@linear: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model - --params_file best_linear --study_name=linear_100_10000 --default_config model.yaml + --params_file best_linear --study_name=linear_100_10000 --default_config default.yaml + --storage_name sqlite:///model.db deps: - path: logs/models/ hash: md5 - md5: d9c5585db1b343a23229a2fb5e77cbef.dir - size: 4828874 - nfiles: 60 + md5: fd9e6aad79d8a1be29d42da86fd11a98.dir + size: 1366301 + nfiles: 24 - path: model.db hash: md5 - md5: de6e467e793b2519ea5db993786e263e - size: 4870144 + md5: 676963d31977a42501b4243cb25ab935 + size: 593920 - path: output/train.csv hash: md5 - md5: a048280df159bb5ee1ce118d0d3cfd14 - size: 3559023 + md5: f0e4e7434085d033c5038fb1723acc25 + size: 610341 outs: - path: conf/model/best_linear.yaml hash: md5 @@ -501,25 +503,26 @@ stages: size: 332 find_best_model@poly: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model - --params_file best_poly --study_name=poly_100_10000 --default_config model.yaml + --params_file best_poly --study_name=poly_100_10000 --default_config default.yaml + --storage_name sqlite:///model.db deps: - path: logs/models/ hash: md5 - md5: d9c5585db1b343a23229a2fb5e77cbef.dir - size: 4828874 - nfiles: 60 + md5: fd9e6aad79d8a1be29d42da86fd11a98.dir + size: 1366301 + nfiles: 24 - path: model.db hash: md5 - md5: de6e467e793b2519ea5db993786e263e - size: 4870144 + md5: 676963d31977a42501b4243cb25ab935 + size: 593920 - path: output/train.csv hash: md5 - md5: a048280df159bb5ee1ce118d0d3cfd14 - size: 3559023 + md5: f0e4e7434085d033c5038fb1723acc25 + size: 610341 outs: - path: conf/model/best_poly.yaml hash: md5 - md5: a9d600cc46e9f49c3a0cca90f7c7d876 + md5: bd9e29f3e2e34263e48401a682a84a06 size: 370 attacks: cmd: bash attacks.sh ++stage=attack --config-name=attack.yaml @@ -530,34 +533,34 @@ stages: size: 332 - path: conf/model/best_poly.yaml hash: md5 - md5: a9d600cc46e9f49c3a0cca90f7c7d876 + md5: bd9e29f3e2e34263e48401a682a84a06 size: 370 - path: conf/model/best_rbf.yaml hash: md5 - md5: 0a90767d020934a3cd6d0c42a6f21606 - size: 357 + md5: 4932ceac75d6256ce2a7864aa4a5ea3c + size: 359 - path: logs/models/ hash: md5 - md5: d9c5585db1b343a23229a2fb5e77cbef.dir - size: 4828874 - nfiles: 60 + md5: fd9e6aad79d8a1be29d42da86fd11a98.dir + size: 1366301 + nfiles: 24 - path: model.db hash: md5 - md5: de6e467e793b2519ea5db993786e263e - size: 4870144 + md5: 676963d31977a42501b4243cb25ab935 + size: 593920 - path: output/train.csv hash: md5 - md5: a048280df159bb5ee1ce118d0d3cfd14 - size: 3559023 + md5: f0e4e7434085d033c5038fb1723acc25 + size: 610341 outs: - path: attack.db hash: md5 - md5: 79ab050e04b70e212f1be85f09a974ef - size: 2334720 + md5: e4f26ccdc30870d9fea230d7e2f3d517 + size: 303104 - path: logs/attacks/ hash: md5 - md5: 4eabc469a5a951cd423da83bbd47c264.dir - size: 926809 + md5: 9d63507c9eccf50f94d1e8bcca1e9b9a.dir + size: 876433 nfiles: 3 compile_attacks: cmd: python -m deckard.layers.compile --report_folder output/reports/attack/ --results_file @@ -565,89 +568,92 @@ stages: deps: - path: attack.db hash: md5 - md5: 79ab050e04b70e212f1be85f09a974ef - size: 2334720 + md5: e4f26ccdc30870d9fea230d7e2f3d517 + size: 303104 - path: logs/attacks/ hash: md5 - md5: 4eabc469a5a951cd423da83bbd47c264.dir - size: 926809 + md5: 9d63507c9eccf50f94d1e8bcca1e9b9a.dir + size: 876433 nfiles: 3 - path: output/reports/attack/ hash: md5 - md5: f610f016b9a97c37ff59de361311e5b1.dir - size: 7978562 - nfiles: 486 + md5: e8550da3b609d9d52ee496b0cbda8dcd.dir + size: 20185965 + nfiles: 1089 outs: - path: output/attack.csv hash: md5 - md5: f89e17affa7e38b4955ea3edc4661f9c - size: 188715 + md5: e83df99bc4ec73458235032d34d479a3 + size: 395210 find_best_attack@linear: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack - --params_file best_linear --study_name=best_linear --default_config attack.yaml + --params_file best_linear --study_name=best_linear --default_config default.yaml + --storage_name sqlite:///attack.db --direction minimize deps: - path: logs/models/ hash: md5 - md5: d9c5585db1b343a23229a2fb5e77cbef.dir - size: 4828874 - nfiles: 60 + md5: fd9e6aad79d8a1be29d42da86fd11a98.dir + size: 1366301 + nfiles: 24 - path: model.db hash: md5 - md5: de6e467e793b2519ea5db993786e263e - size: 4870144 + md5: 676963d31977a42501b4243cb25ab935 + size: 593920 - path: output/train.csv hash: md5 - md5: a048280df159bb5ee1ce118d0d3cfd14 - size: 3559023 + md5: f0e4e7434085d033c5038fb1723acc25 + size: 610341 outs: - path: conf/attack/best_linear.yaml hash: md5 - md5: 4bb6215963ae7f0025f72ec31e26f29d - size: 244 + md5: b7ef4b4d709a4511ebd4f0a5e9002cdb + size: 248 find_best_attack@rbf: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack - --params_file best_rbf --study_name=best_rbf --default_config attack.yaml + --params_file best_rbf --study_name=best_rbf --default_config default.yaml + --storage_name sqlite:///attack.db --direction minimize deps: - path: logs/models/ hash: md5 - md5: d9c5585db1b343a23229a2fb5e77cbef.dir - size: 4828874 - nfiles: 60 + md5: fd9e6aad79d8a1be29d42da86fd11a98.dir + size: 1366301 + nfiles: 24 - path: model.db hash: md5 - md5: de6e467e793b2519ea5db993786e263e - size: 4870144 + md5: 676963d31977a42501b4243cb25ab935 + size: 593920 - path: output/train.csv hash: md5 - md5: a048280df159bb5ee1ce118d0d3cfd14 - size: 3559023 + md5: f0e4e7434085d033c5038fb1723acc25 + size: 610341 outs: - path: conf/attack/best_rbf.yaml hash: md5 - md5: eca3091f7c0eb0b8958bc6becf43191d - size: 244 + md5: 74476a2360110c0c8c4e728857da2472 + size: 252 find_best_attack@poly: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack - --params_file best_poly --study_name=best_poly --default_config attack.yaml + --params_file best_poly --study_name=best_poly --default_config default.yaml + --storage_name sqlite:///attack.db --direction minimize deps: - path: logs/models/ hash: md5 - md5: d9c5585db1b343a23229a2fb5e77cbef.dir - size: 4828874 - nfiles: 60 + md5: fd9e6aad79d8a1be29d42da86fd11a98.dir + size: 1366301 + nfiles: 24 - path: model.db hash: md5 - md5: de6e467e793b2519ea5db993786e263e - size: 4870144 + md5: 676963d31977a42501b4243cb25ab935 + size: 593920 - path: output/train.csv hash: md5 - md5: a048280df159bb5ee1ce118d0d3cfd14 - size: 3559023 + md5: f0e4e7434085d033c5038fb1723acc25 + size: 610341 outs: - path: conf/attack/best_poly.yaml hash: md5 - md5: b5f8f874e44dbc8bdb0ababc67295174 - size: 246 + md5: 0e7533628e42f20dc5a34c35e2fb701a + size: 250 other_data_train@kdd_nsl: cmd: DATASET_NAME=kdd_nsl bash other_data.sh data=kdd_nsl +stage=train --config-name=model.yaml deps: @@ -683,109 +689,110 @@ stages: deps: - path: conf/attack/best_linear.yaml hash: md5 - md5: 4bb6215963ae7f0025f72ec31e26f29d - size: 244 + md5: b7ef4b4d709a4511ebd4f0a5e9002cdb + size: 248 - path: conf/attack/best_poly.yaml hash: md5 - md5: b5f8f874e44dbc8bdb0ababc67295174 - size: 246 + md5: 0e7533628e42f20dc5a34c35e2fb701a + size: 250 - path: conf/attack/best_rbf.yaml hash: md5 - md5: eca3091f7c0eb0b8958bc6becf43191d - size: 244 + md5: 74476a2360110c0c8c4e728857da2472 + size: 252 - path: conf/model/best_linear.yaml hash: md5 md5: 23a7c49f5a8ddf63a7ac89fb61c0034d size: 332 - path: conf/model/best_poly.yaml hash: md5 - md5: a9d600cc46e9f49c3a0cca90f7c7d876 + md5: bd9e29f3e2e34263e48401a682a84a06 size: 370 - path: conf/model/best_rbf.yaml hash: md5 - md5: 0a90767d020934a3cd6d0c42a6f21606 - size: 357 + md5: 4932ceac75d6256ce2a7864aa4a5ea3c + size: 359 - path: output/attacks/ hash: md5 - md5: 2706070162d082792d7b52629d691d15.dir - size: 2410072 - nfiles: 61 - - path: output/models/ - hash: md5 - md5: c7222ada919037fb45b73e4f6c1f88a2.dir - size: 70825596 - nfiles: 1244 + md5: 658e0a848877fbafbddd62ec5dd22dc3.dir + size: 4819192 + nfiles: 121 outs: - path: plots/after_retrain_confidence.csv hash: md5 - md5: 8838aabe00dcca60ae5c5681174bfc7f - size: 18011 + md5: c2273c7a9d789de1939d5006a7a087eb + size: 326367 - path: plots/before_retrain_confidence.csv hash: md5 - md5: edc0f782bfd97743823318d6b14d5d14 - size: 17994 + md5: 1a52061abda8e60e503ea271439b8f8a + size: 326350 - path: retrain/ hash: md5 - md5: 062d1374edb8e366a1c65308fa4fdfbc.dir - size: 176883 + md5: 22c8403d05f0f866398b504f6f3c4d37.dir + size: 173285 nfiles: 12 plots: cmd: python plots.py deps: - path: output/attack.csv hash: md5 - md5: f89e17affa7e38b4955ea3edc4661f9c - size: 188715 + md5: e83df99bc4ec73458235032d34d479a3 + size: 395210 - path: output/train.csv hash: md5 - md5: a048280df159bb5ee1ce118d0d3cfd14 - size: 3559023 + md5: f0e4e7434085d033c5038fb1723acc25 + size: 610341 + - path: plots.py + hash: md5 + md5: d7b45f7ef670728e8a238909265334f2 + size: 12114 - path: plots/after_retrain_confidence.csv hash: md5 - md5: 8838aabe00dcca60ae5c5681174bfc7f - size: 18011 + md5: c2273c7a9d789de1939d5006a7a087eb + size: 326367 - path: plots/before_retrain_confidence.csv hash: md5 - md5: edc0f782bfd97743823318d6b14d5d14 - size: 17994 + md5: 1a52061abda8e60e503ea271439b8f8a + size: 326350 outs: - path: plots/accuracy_vs_attack_parameters.eps hash: md5 - md5: 62ba219171d53a6d7bee9adaaa5dcae2 - size: 41249 + md5: 13be25e57708a0b2e7c6d062ad310b97 + size: 38999 - path: plots/accuracy_vs_features.eps hash: md5 - md5: 45d51ca30fc0e46849609941fc4cbb53 - size: 21450 + md5: 3cf6dc9eb9913ab3babc82002abc5ad4 + size: 21548 - path: plots/accuracy_vs_samples.eps hash: md5 - md5: c7bba36d352106cdeee655e01870bdcf - size: 23719 + md5: be2def33826b2131795cf599a87f12de + size: 25049 - path: plots/confidence_vs_attack_parameters.eps hash: md5 - md5: c2887dfae9cdfbb24d9d15d3655c3c87 - size: 40822 + md5: 24d6d00ad927000bc60ab2012f56520c + size: 41436 - path: plots/retrain_accuracy.eps hash: md5 - md5: 25d6d1ec08dc127bcd04470ca476d146 - size: 23419 + md5: 2b62b83a5b7a37c16d25319602e102f4 + size: 30833 - path: plots/retrain_confidence_vs_attack_parameters.eps hash: md5 - md5: 5a6969fefe91e5c675600e07d8bff580 - size: 40819 + md5: 860ffadab6254488091c8bc1c619f56c + size: 41628 - path: plots/retrain_time.eps hash: md5 - md5: 2d28bfca3ebb7ef3b7b4fbfb69eb045f - size: 20957 + md5: e32d6c3cc459943ea418eea1e20fdc2f + size: 28407 - path: plots/train_time_vs_attack_parameters.eps hash: md5 - md5: f56d1fc7846df9a1276749a9bd5675e9 - size: 38521 + md5: 5e88339288029b1f53f7f02d6a88bafe + size: 39252 - path: plots/train_time_vs_features.eps hash: md5 - md5: a3300cdd85533e51ce108c4f141376f6 - size: 20644 + md5: 2bf86c698e490164eb5fe4f76743f21b + size: 19529 - path: plots/train_time_vs_samples.eps hash: md5 - md5: 15f3f109c2f09c01edc6bc0e68786ce6 - size: 24036 + md5: 99b6bb26684bccd5092e92e095f2b484 + size: 24348 + move_files: + cmd: 'cp -r ./plots/* ~/KDD-Paper-EAI-AISEC/generated/ ' diff --git a/examples/security/classification/dvc.yaml b/examples/security/classification/dvc.yaml index e44f6357..4ee7d639 100644 --- a/examples/security/classification/dvc.yaml +++ b/examples/security/classification/dvc.yaml @@ -74,7 +74,7 @@ stages: - rbf - poly do: - cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model --params_file best_${item} --study_name=${item}_100_10000 --default_config model.yaml + cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model --params_file best_${item} --study_name=${item}_100_10000 --default_config default.yaml --storage_name sqlite:///model.db outs: - conf/model/best_${item}.yaml deps: @@ -112,7 +112,7 @@ stages: - rbf - poly do: - cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack --params_file best_${item} --study_name=best_${item} --default_config attack.yaml + cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack --params_file best_${item} --study_name=best_${item} --default_config default.yaml --storage_name sqlite:///attack.db --direction minimize outs: - conf/attack/best_${item}.yaml deps: @@ -122,7 +122,6 @@ stages: retrain: cmd : python retrain.py deps: - - ${files.directory}/models/ - ${files.directory}/attacks/ - conf/attack/best_linear.yaml - conf/attack/best_rbf.yaml @@ -142,6 +141,7 @@ stages: - output/train.csv - plots/before_retrain_confidence.csv - output/attack.csv + - plots.py plots : - plots/accuracy_vs_attack_parameters.eps - plots/accuracy_vs_features.eps @@ -153,3 +153,7 @@ stages: - plots/retrain_accuracy.eps - plots/retrain_confidence_vs_attack_parameters.eps - plots/retrain_time.eps + move_files: + cmd: >- + cp -r ./plots/* ~/KDD-Paper-EAI-AISEC/generated/ + #&& rm ~/KDD-Paper-EAI-AISEC/generated/.gitignore diff --git a/examples/security/classification/plots.py b/examples/security/classification/plots.py index 3e515da7..b815a223 100644 --- a/examples/security/classification/plots.py +++ b/examples/security/classification/plots.py @@ -18,12 +18,9 @@ # else: # results = parse_results("reports/model_queue/") results = pd.read_csv("output/train.csv") -input_size = ( - results["data.generate.kwargs.n_samples"] - * results["data.generate.kwargs.n_features"] -) -results["Kernel"] = results["model.init.kwargs.kernel"].copy() -results["Features"] = results["data.generate.kwargs.n_features"].copy() +input_size = results["data.generate.n_samples"] * results["data.generate.n_features"] +results["Kernel"] = results["model.init.kernel"].copy() +results["Features"] = results["data.generate.n_features"].copy() results["Samples"] = results["data.sample.train_size"].copy() results["input_size"] = input_size if "Unnamed: 0" in results.columns: @@ -31,11 +28,11 @@ for col in results.columns: if col == "data.name" and isinstance(results[col][0], list): results[col] = results[col].apply(lambda x: x[0]) -results = results[results["model.init.kwargs.kernel"] != "sigmoid"] +results = results[results["model.init.kernel"] != "sigmoid"] attack_results = pd.read_csv("output/attack.csv") -attack_results["Kernel"] = attack_results["model.init.kwargs.kernel"].copy() -attack_results["Features"] = attack_results["data.generate.kwargs.n_features"].copy() +attack_results["Kernel"] = attack_results["model.init.kernel"].copy() +attack_results["Features"] = attack_results["data.generate.n_features"].copy() attack_results["Samples"] = attack_results["data.sample.train_size"].copy() if "Unnamed: 0" in attack_results.columns: del attack_results["Unnamed: 0"] @@ -50,6 +47,8 @@ data=results, style="Kernel", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph1.legend(labels=["Linear", "RBF", "Poly"]) graph1.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") @@ -62,11 +61,13 @@ plt.gcf().clear() graph2 = sns.lineplot( - x="data.generate.kwargs.n_features", + x="data.generate.n_features", y="accuracy", data=results, style="Kernel", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph2.set_xlabel("Number of Features") graph2.set_ylabel("Accuracy") @@ -78,11 +79,13 @@ graph3 = sns.lineplot( - x="data.generate.kwargs.n_features", + x="data.generate.n_features", y="train_time", data=results, style="Kernel", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph3.set_xlabel("Number of Features") graph3.set_ylabel("Training Time") @@ -98,6 +101,8 @@ data=results, style="Kernel", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph4.set_xlabel("Number of Samples") graph4.set_ylabel("Training Time") @@ -109,7 +114,7 @@ fig, ax = plt.subplots(2, 2) graph5 = sns.lineplot( - x="attack.init.kwargs.eps", + x="attack.init.eps", y="accuracy", data=attack_results, style="Kernel", @@ -117,20 +122,24 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph5.set(xscale="log", xlabel="Perturbation Distance", ylabel="Accuracy") graph6 = sns.lineplot( - x="attack.init.kwargs.eps_step", + x="attack.init.eps_step", y="accuracy", data=attack_results, style="Kernel", ax=ax[0, 1], color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph6.set(xscale="log", xlabel="Perturbation Step", ylabel="Accuracy") graph7 = sns.lineplot( - x="attack.init.kwargs.max_iter", + x="attack.init.max_iter", y="accuracy", data=attack_results, style="Kernel", @@ -138,10 +147,12 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph7.set(xscale="log", xlabel="Maximum Iterations", ylabel="Accuracy") graph8 = sns.lineplot( - x="attack.init.kwargs.batch_size", + x="attack.init.batch_size", y="accuracy", data=attack_results, style="Kernel", @@ -149,6 +160,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph8.set(xscale="log", xlabel="Batch Size", ylabel="Accuracy") graph6.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") @@ -158,7 +171,7 @@ fig, ax = plt.subplots(2, 2) graph9 = sns.lineplot( - x="attack.init.kwargs.eps", + x="attack.init.eps", y="adv_fit_time", data=attack_results, style="Kernel", @@ -166,20 +179,24 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="Attack Time") graph10 = sns.lineplot( - x="attack.init.kwargs.eps_step", + x="attack.init.eps_step", y="adv_fit_time", data=attack_results, style="Kernel", ax=ax[0, 1], color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="Attack Time") graph11 = sns.lineplot( - x="attack.init.kwargs.max_iter", + x="attack.init.max_iter", y="adv_fit_time", data=attack_results, style="Kernel", @@ -187,10 +204,12 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="Attack Time") graph12 = sns.lineplot( - x="attack.init.kwargs.batch_size", + x="attack.init.batch_size", y="adv_fit_time", data=attack_results, style="Kernel", @@ -198,6 +217,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph12.set(xscale="log", xlabel="Batch Size", ylabel="Attack Time") graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") @@ -225,6 +246,8 @@ data=retrain_df, style="Kernel", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) retrain = sns.lineplot( x="Epochs", @@ -234,6 +257,8 @@ color="darkred", legend=False, style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) retrain.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") retrain.set_xlabel("Retraining Epochs") @@ -250,6 +275,8 @@ data=retrain_df, style="Kernel", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) retrain = sns.lineplot( x="Epochs", @@ -259,6 +286,8 @@ color="darkred", legend=False, style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) retrain.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") retrain.set_xlabel("Retraining Epochs") @@ -279,6 +308,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="False Confidence") graph10 = sns.lineplot( @@ -289,6 +320,8 @@ ax=ax[0, 1], color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="False Confidence") graph11 = sns.lineplot( @@ -300,6 +333,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="False Confidence") graph12 = sns.lineplot( @@ -311,6 +346,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph12.set(xscale="log", xlabel="Batch Size", ylabel="False Confidence") graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") @@ -330,6 +367,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="False Confidence") graph10 = sns.lineplot( @@ -340,6 +379,8 @@ ax=ax[0, 1], color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="False Confidence") graph11 = sns.lineplot( @@ -351,6 +392,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="False Confidence") graph12 = sns.lineplot( @@ -362,6 +405,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph12.set(xscale="log", xlabel="Batch Size", ylabel="False Confidence") graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") diff --git a/examples/security/classification/plots/.gitignore b/examples/security/classification/plots/.gitignore new file mode 100644 index 00000000..4c882c2e --- /dev/null +++ b/examples/security/classification/plots/.gitignore @@ -0,0 +1,10 @@ +/accuracy_vs_attack_parameters.eps +/accuracy_vs_features.eps +/accuracy_vs_samples.eps +/confidence_vs_attack_parameters.eps +/train_time_vs_attack_parameters.eps +/train_time_vs_features.eps +/train_time_vs_samples.eps +/retrain_accuracy.eps +/retrain_confidence_vs_attack_parameters.eps +/retrain_time.eps diff --git a/examples/security/classification/retrain.py b/examples/security/classification/retrain.py index 9623e19d..8ae973e0 100644 --- a/examples/security/classification/retrain.py +++ b/examples/security/classification/retrain.py @@ -344,7 +344,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list: params = json.load(f) else: raise ValueError(f"No params file found for {folder}") - attack_params = params["attack"]["init"]["kwargs"] + attack_params = params["attack"]["init"] attack_params.update({"name": params["attack"]["init"]["name"]}) confidence_ser["Kernel"] = name confidence_ser["Average False Confidence"] = avg_prob @@ -432,7 +432,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list: else: logger.warning(f"No params file found for {folder}") continue - attack_params = params["attack"]["init"]["kwargs"] + attack_params = params["attack"]["init"] attack_params.update({"name": params["attack"]["init"]["name"]}) confidence_ser["Kernel"] = name confidence_ser["Average False Confidence After Retraining"] = avg_prob diff --git a/examples/security/kdd-nsl/.gitignore b/examples/security/kdd-nsl/.gitignore index 8a746d89..273db2f4 100644 --- a/examples/security/kdd-nsl/.gitignore +++ b/examples/security/kdd-nsl/.gitignore @@ -1,3 +1,4 @@ logs/ multirun/ output/ +/retrain diff --git a/examples/security/kdd-nsl/attacks.sh b/examples/security/kdd-nsl/attacks.sh index 76ed02bc..8b53b739 100644 --- a/examples/security/kdd-nsl/attacks.sh +++ b/examples/security/kdd-nsl/attacks.sh @@ -11,7 +11,7 @@ for model_config in $CONFIG_NAMES; do continue fi HYDRA_FULL_ERROR=1 python -m deckard.layers.optimise \ - ++model.init.kernel=kernel_name \ + ++model.init.kernel=${kernel_name}\ ++stage=attack \ ++attack.init.name=art.attacks.evasion.ProjectedGradientDescent \ ++attack.init.norm=1,2,inf \ @@ -21,6 +21,7 @@ for model_config in $CONFIG_NAMES; do ++attack.init.max_iter=1,10,100,1000 \ ++hydra.sweeper.study_name=$model_config \ ++attack.attack_size=100 \ + direction=minimize \ model=$model_config $@ --multirun >> logs/attacks/$model_config.log echo "Successfully completed model $model_config" >> attack_log.txt done diff --git a/examples/security/kdd-nsl/dvc.lock b/examples/security/kdd-nsl/dvc.lock index 9497e7e0..c2fecd0f 100644 --- a/examples/security/kdd-nsl/dvc.lock +++ b/examples/security/kdd-nsl/dvc.lock @@ -94,39 +94,39 @@ stages: outs: - path: output/reports/train/default/params.yaml hash: md5 - md5: 7234aab7d5edae504afa2090d96e4c3f - size: 2434 + md5: 6225c0aefe4059bfae7f5b0e04ae549a + size: 2189 - path: output/reports/train/default/predictions.json hash: md5 - md5: 7e3dec7b2d06af151bf81addc33fba5a - size: 44061 + md5: 3c5089245ae71f1b860304a02a224078 + size: 70072 - path: output/reports/train/default/probabilities.json hash: md5 - md5: 7e3dec7b2d06af151bf81addc33fba5a - size: 44061 + md5: 3c5089245ae71f1b860304a02a224078 + size: 70072 - path: output/reports/train/default/score_dict.json hash: md5 - md5: 8869350701c8b1b367cdb1a33ab572d9 - size: 360 + md5: cc368afafd0e89f04fb0ae89e64f5e0d + size: 716 attack: cmd: python -m deckard.layers.experiment attack deps: - path: output/reports/train/default/params.yaml hash: md5 - md5: 7234aab7d5edae504afa2090d96e4c3f - size: 2434 + md5: 6225c0aefe4059bfae7f5b0e04ae549a + size: 2189 - path: output/reports/train/default/predictions.json hash: md5 - md5: 7e3dec7b2d06af151bf81addc33fba5a - size: 44061 + md5: 3c5089245ae71f1b860304a02a224078 + size: 70072 - path: output/reports/train/default/probabilities.json hash: md5 - md5: 7e3dec7b2d06af151bf81addc33fba5a - size: 44061 + md5: 3c5089245ae71f1b860304a02a224078 + size: 70072 - path: output/reports/train/default/score_dict.json hash: md5 - md5: 8869350701c8b1b367cdb1a33ab572d9 - size: 360 + md5: cc368afafd0e89f04fb0ae89e64f5e0d + size: 716 params: params.yaml: attack: @@ -315,32 +315,32 @@ stages: outs: - path: output/attacks/attack.pkl hash: md5 - md5: b240c5f9c659967fe4768b5929a84905 + md5: e250ed2062f12ee9f024bf1be33abf73 size: 1832 - path: output/reports/attack/default/adv_predictions.json hash: md5 - md5: 36e7fcc5fe32df3a68a2603317e3d328 - size: 438 + md5: 8cb93c0ec6db31d94298f831ac081c64 + size: 700 - path: output/reports/attack/default/adv_probabilities.json hash: md5 - md5: 36e7fcc5fe32df3a68a2603317e3d328 - size: 438 + md5: 8cb93c0ec6db31d94298f831ac081c64 + size: 700 - path: output/reports/attack/default/params.yaml hash: md5 - md5: b300c684dc58fc23684ccefbb9f83265 - size: 5832 + md5: 3aa13a2e1e66b911f66d9bd8a8823369 + size: 5310 - path: output/reports/attack/default/predictions.json hash: md5 - md5: 7e3dec7b2d06af151bf81addc33fba5a - size: 44061 + md5: 3c5089245ae71f1b860304a02a224078 + size: 70072 - path: output/reports/attack/default/probabilities.json hash: md5 - md5: 7e3dec7b2d06af151bf81addc33fba5a - size: 44061 + md5: 3c5089245ae71f1b860304a02a224078 + size: 70072 - path: output/reports/attack/default/score_dict.json hash: md5 - md5: f8b8b80b2e8369f09e1f4730fcd9ba57 - size: 582 + md5: 595fabb17f79dca7ef3d7799e6a43388 + size: 1235 models: cmd: bash other_data.sh +stage=train --config-name=model.yaml deps: @@ -448,75 +448,77 @@ stages: outs: - path: logs/models/ hash: md5 - md5: ab01d57634e90f21b3b9a25ff62da3ca.dir - size: 359561 + md5: 3bdfd76f9298422ef6c1b55ef111802c.dir + size: 202845 nfiles: 3 - path: model.db hash: md5 - md5: 081a4f2934142058dbe5674f8d087031 - size: 733184 + md5: 155463edba880de94ed717294def04a8 + size: 208896 compile_models: cmd: python -m deckard.layers.compile --report_folder output/reports/train/ --results_file output/train.csv deps: - path: logs/models/ hash: md5 - md5: ab01d57634e90f21b3b9a25ff62da3ca.dir - size: 359561 + md5: 3bdfd76f9298422ef6c1b55ef111802c.dir + size: 202845 nfiles: 3 - path: model.db hash: md5 - md5: 081a4f2934142058dbe5674f8d087031 - size: 733184 + md5: 155463edba880de94ed717294def04a8 + size: 208896 - path: output/reports/train/ hash: md5 - md5: 4bbc6640609fdcd2e3d8595678dc22c8.dir - size: 42445285 - nfiles: 1672 + md5: df8221c356532e382e7f6909027e1648.dir + size: 11786125 + nfiles: 336 outs: - path: output/train.csv hash: md5 - md5: c740b7ccc67c3f38a04446ad0afe5ce6 - size: 611967 + md5: 4508b28e78d9b4d38dd60a10b54798dc + size: 164189 find_best_model@rbf: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model - --params_file best_rbf --study_name=rbf --default_config model.yaml + --params_file best_rbf --study_name=rbf --default_config default.yaml --storage_name + sqlite:///model.db deps: - path: logs/models/ hash: md5 - md5: ab01d57634e90f21b3b9a25ff62da3ca.dir - size: 359561 + md5: 3bdfd76f9298422ef6c1b55ef111802c.dir + size: 202845 nfiles: 3 - path: model.db hash: md5 - md5: 081a4f2934142058dbe5674f8d087031 - size: 733184 + md5: 155463edba880de94ed717294def04a8 + size: 208896 - path: output/train.csv hash: md5 - md5: c740b7ccc67c3f38a04446ad0afe5ce6 - size: 611967 + md5: 4508b28e78d9b4d38dd60a10b54798dc + size: 164189 outs: - path: conf/model/best_rbf.yaml hash: md5 - md5: 3092c0288833989d2e77d849993a2a40 - size: 360 + md5: 7210f1655e71b637d09822e3faa1f0ff + size: 358 find_best_model@linear: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model - --params_file best_linear --study_name=linear --default_config model.yaml + --params_file best_linear --study_name=linear --default_config default.yaml + --storage_name sqlite:///model.db deps: - path: logs/models/ hash: md5 - md5: ab01d57634e90f21b3b9a25ff62da3ca.dir - size: 359561 + md5: 3bdfd76f9298422ef6c1b55ef111802c.dir + size: 202845 nfiles: 3 - path: model.db hash: md5 - md5: 081a4f2934142058dbe5674f8d087031 - size: 733184 + md5: 155463edba880de94ed717294def04a8 + size: 208896 - path: output/train.csv hash: md5 - md5: c740b7ccc67c3f38a04446ad0afe5ce6 - size: 611967 + md5: 4508b28e78d9b4d38dd60a10b54798dc + size: 164189 outs: - path: conf/model/best_linear.yaml hash: md5 @@ -524,26 +526,27 @@ stages: size: 330 find_best_model@poly: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model - --params_file best_poly --study_name=poly --default_config model.yaml + --params_file best_poly --study_name=poly --default_config default.yaml --storage_name + sqlite:///model.db deps: - path: logs/models/ hash: md5 - md5: ab01d57634e90f21b3b9a25ff62da3ca.dir - size: 359561 + md5: 3bdfd76f9298422ef6c1b55ef111802c.dir + size: 202845 nfiles: 3 - path: model.db hash: md5 - md5: 081a4f2934142058dbe5674f8d087031 - size: 733184 + md5: 155463edba880de94ed717294def04a8 + size: 208896 - path: output/train.csv hash: md5 - md5: c740b7ccc67c3f38a04446ad0afe5ce6 - size: 611967 + md5: 4508b28e78d9b4d38dd60a10b54798dc + size: 164189 outs: - path: conf/model/best_poly.yaml hash: md5 - md5: 12f892f3ba4ef8bab095b36bd7558d3e - size: 372 + md5: 49c26d851f36ef980b4a5bb1dabfebd8 + size: 370 attacks: cmd: bash attacks.sh ++stage=attack --config-name=attack.yaml deps: @@ -553,34 +556,34 @@ stages: size: 330 - path: conf/model/best_poly.yaml hash: md5 - md5: 12f892f3ba4ef8bab095b36bd7558d3e - size: 372 + md5: 49c26d851f36ef980b4a5bb1dabfebd8 + size: 370 - path: conf/model/best_rbf.yaml hash: md5 - md5: 3092c0288833989d2e77d849993a2a40 - size: 360 + md5: 7210f1655e71b637d09822e3faa1f0ff + size: 358 - path: logs/models/ hash: md5 - md5: ab01d57634e90f21b3b9a25ff62da3ca.dir - size: 359561 + md5: 3bdfd76f9298422ef6c1b55ef111802c.dir + size: 202845 nfiles: 3 - path: model.db hash: md5 - md5: 081a4f2934142058dbe5674f8d087031 - size: 733184 + md5: 155463edba880de94ed717294def04a8 + size: 208896 - path: output/train.csv hash: md5 - md5: c740b7ccc67c3f38a04446ad0afe5ce6 - size: 611967 + md5: 4508b28e78d9b4d38dd60a10b54798dc + size: 164189 outs: - path: attack.db hash: md5 - md5: 380effd61d22da8bc2b0f655e67f1cf0 - size: 700416 + md5: 37f5c17e7689935a334caf09c8aac40c + size: 315392 - path: logs/attacks/ hash: md5 - md5: e3d5880a8a34d62926f202472f635636.dir - size: 7098648 + md5: 18f2cba5502fa20600145eb551f2e64b.dir + size: 1695110 nfiles: 3 compile_attacks: cmd: python -m deckard.layers.compile --report_folder output/reports/attack/ --results_file @@ -588,89 +591,92 @@ stages: deps: - path: attack.db hash: md5 - md5: 380effd61d22da8bc2b0f655e67f1cf0 - size: 700416 + md5: 37f5c17e7689935a334caf09c8aac40c + size: 315392 - path: logs/attacks/ hash: md5 - md5: e3d5880a8a34d62926f202472f635636.dir - size: 7098648 + md5: 18f2cba5502fa20600145eb551f2e64b.dir + size: 1695110 nfiles: 3 - path: output/reports/attack/ hash: md5 - md5: 9a8c30a61ea2025b38ad09a7bd1a8e82.dir - size: 64940922 - nfiles: 4355 + md5: b71df3c8f2374573d6170f3223aa9b9c.dir + size: 39783146 + nfiles: 2169 outs: - path: output/attack.csv hash: md5 - md5: b0d1e2263515e400f6303c3afb0f5cfd - size: 1545938 + md5: 3ba52610fa5c0f042ceb92c3139f5596 + size: 983830 find_best_attack@linear: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack - --params_file best_linear --study_name=best_linear --default_config attack.yaml + --params_file best_linear --study_name=best_linear --default_config default.yaml + --storage_name sqlite:///attack.db --direction minimize deps: - path: attack.db hash: md5 - md5: 380effd61d22da8bc2b0f655e67f1cf0 - size: 700416 + md5: 37f5c17e7689935a334caf09c8aac40c + size: 315392 - path: logs/models/ hash: md5 - md5: ab01d57634e90f21b3b9a25ff62da3ca.dir - size: 359561 + md5: 3bdfd76f9298422ef6c1b55ef111802c.dir + size: 202845 nfiles: 3 - path: output/train.csv hash: md5 - md5: c740b7ccc67c3f38a04446ad0afe5ce6 - size: 611967 + md5: 4508b28e78d9b4d38dd60a10b54798dc + size: 164189 outs: - path: conf/attack/best_linear.yaml hash: md5 - md5: f048059aaa0e383f9c5ae9c085927588 - size: 231 + md5: d154a851ce6ec4fd55b11dbc50bea318 + size: 249 find_best_attack@rbf: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack - --params_file best_rbf --study_name=best_rbf --default_config attack.yaml + --params_file best_rbf --study_name=best_rbf --default_config default.yaml + --storage_name sqlite:///attack.db --direction minimize deps: - path: attack.db hash: md5 - md5: 380effd61d22da8bc2b0f655e67f1cf0 - size: 700416 + md5: 37f5c17e7689935a334caf09c8aac40c + size: 315392 - path: logs/models/ hash: md5 - md5: ab01d57634e90f21b3b9a25ff62da3ca.dir - size: 359561 + md5: 3bdfd76f9298422ef6c1b55ef111802c.dir + size: 202845 nfiles: 3 - path: output/train.csv hash: md5 - md5: c740b7ccc67c3f38a04446ad0afe5ce6 - size: 611967 + md5: 4508b28e78d9b4d38dd60a10b54798dc + size: 164189 outs: - path: conf/attack/best_rbf.yaml hash: md5 - md5: 936f60710cd2fba6d1b3584accc94943 - size: 246 + md5: c68a838c04899ee68e0072f640af2f21 + size: 248 find_best_attack@poly: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack - --params_file best_poly --study_name=best_poly --default_config attack.yaml + --params_file best_poly --study_name=best_poly --default_config default.yaml + --storage_name sqlite:///attack.db --direction minimize deps: - path: attack.db hash: md5 - md5: 380effd61d22da8bc2b0f655e67f1cf0 - size: 700416 + md5: 37f5c17e7689935a334caf09c8aac40c + size: 315392 - path: logs/models/ hash: md5 - md5: ab01d57634e90f21b3b9a25ff62da3ca.dir - size: 359561 + md5: 3bdfd76f9298422ef6c1b55ef111802c.dir + size: 202845 nfiles: 3 - path: output/train.csv hash: md5 - md5: c740b7ccc67c3f38a04446ad0afe5ce6 - size: 611967 + md5: 4508b28e78d9b4d38dd60a10b54798dc + size: 164189 outs: - path: conf/attack/best_poly.yaml hash: md5 - md5: 26b55aad33b06e46b07904b00c5cb236 - size: 228 + md5: 33974287420fdf63175bb6e0212a1e9b + size: 251 other_data_train@kdd_nsl: cmd: DATASET_NAME=kdd_nsl bash other_data.sh data=kdd_nsl +stage=train --config-name=model.yaml deps: @@ -706,93 +712,94 @@ stages: deps: - path: conf/attack/best_linear.yaml hash: md5 - md5: f048059aaa0e383f9c5ae9c085927588 - size: 231 + md5: d154a851ce6ec4fd55b11dbc50bea318 + size: 249 - path: conf/attack/best_poly.yaml hash: md5 - md5: 26b55aad33b06e46b07904b00c5cb236 - size: 228 + md5: 33974287420fdf63175bb6e0212a1e9b + size: 251 - path: conf/attack/best_rbf.yaml hash: md5 - md5: 936f60710cd2fba6d1b3584accc94943 - size: 246 + md5: c68a838c04899ee68e0072f640af2f21 + size: 248 - path: conf/model/best_linear.yaml hash: md5 md5: e4ae7059114d8724d4947e952145d4fe size: 330 - path: conf/model/best_poly.yaml hash: md5 - md5: 12f892f3ba4ef8bab095b36bd7558d3e - size: 372 + md5: 49c26d851f36ef980b4a5bb1dabfebd8 + size: 370 - path: conf/model/best_rbf.yaml hash: md5 - md5: 3092c0288833989d2e77d849993a2a40 - size: 360 + md5: 7210f1655e71b637d09822e3faa1f0ff + size: 358 - path: output/attacks/ hash: md5 - md5: 4551130dd81dfa20db94f2888d04675c.dir - size: 725472 - nfiles: 396 - - path: output/models/ - hash: md5 - md5: a738ec4b74e79472cfce860968cba882.dir - size: 2390233 - nfiles: 279 + md5: fa1bb6df926ae12f22c2651ab77c3a86.dir + size: 4070312 + nfiles: 241 outs: - path: plots/after_retrain_confidence.csv hash: md5 - md5: ce54cebd30fd5088597f7db85eab1754 - size: 114012 + md5: d06f8ccd3410c566773776bee2933753 + size: 785930 - path: plots/before_retrain_confidence.csv hash: md5 - md5: 82ff291d66e8f067a223cfcf1f117f63 - size: 113995 + md5: 7289fa5bcd5712d52801b76b36159d80 + size: 785913 - path: retrain/ hash: md5 - md5: 5f501f7245ed485c6d1d0e5ac44297a3.dir - size: 174463 + md5: 9f340584668054abbc4cda10df68f660.dir + size: 172962 nfiles: 12 plots: cmd: python plots.py deps: - path: output/attack.csv hash: md5 - md5: b0d1e2263515e400f6303c3afb0f5cfd - size: 1545938 + md5: 3ba52610fa5c0f042ceb92c3139f5596 + size: 983830 - path: output/train.csv hash: md5 - md5: c740b7ccc67c3f38a04446ad0afe5ce6 - size: 611967 + md5: 4508b28e78d9b4d38dd60a10b54798dc + size: 164189 + - path: plots.py + hash: md5 + md5: 6f0729bdca6bafc3c92faca71dc8c97e + size: 10164 - path: plots/after_retrain_confidence.csv hash: md5 - md5: ce54cebd30fd5088597f7db85eab1754 - size: 114012 + md5: d06f8ccd3410c566773776bee2933753 + size: 785930 - path: plots/before_retrain_confidence.csv hash: md5 - md5: 82ff291d66e8f067a223cfcf1f117f63 - size: 113995 + md5: 7289fa5bcd5712d52801b76b36159d80 + size: 785913 outs: - - path: plots/accuracy_vs_attack_parameters.pdf + - path: plots/accuracy_vs_attack_parameters.eps hash: md5 - md5: 8adf0a397611373445d6d4537acd494d - size: 16715 - - path: plots/confidence_vs_attack_parameters.pdf + md5: 8174380cd1e3153249aa7f4095905d82 + size: 39189 + - path: plots/confidence_vs_attack_parameters.eps hash: md5 - md5: de3ef58684597cc5e71a4f6062128fe7 - size: 18202 - - path: plots/retrain_accuracy.pdf + md5: e612551ce45bfb4fbd134c0058ae038d + size: 41785 + - path: plots/retrain_accuracy.eps hash: md5 - md5: 577e89d46eb6f2446d0a3ed83b4f9e19 - size: 13913 - - path: plots/retrain_confidence_vs_attack_parameters.pdf + md5: 5d0161b9c44e397e167e200738709fe3 + size: 30829 + - path: plots/retrain_confidence_vs_attack_parameters.eps hash: md5 - md5: 4f7b2f8e2a7a4552816389bd1dcaa074 - size: 18181 - - path: plots/retrain_time.pdf + md5: 76c457aeabd26983a5fc3a129e942c0a + size: 42149 + - path: plots/retrain_time.eps hash: md5 - md5: 7ad5725d3c3033b796ece976881d852d - size: 12896 - - path: plots/train_time_vs_attack_parameters.pdf + md5: 461075c4b7f2f693c22f96e34db026ca + size: 28368 + - path: plots/train_time_vs_attack_parameters.eps hash: md5 - md5: c2436157654bd664dc06528fcbfc834a - size: 17032 + md5: 59de7016df4a8380776a7ea0dd160359 + size: 39247 + move_files: + cmd: cp -r plots/* ~/KDD-Paper-EAI-AISEC/kdd-nsl/ diff --git a/examples/security/kdd-nsl/dvc.yaml b/examples/security/kdd-nsl/dvc.yaml index 04164939..b3ea885c 100644 --- a/examples/security/kdd-nsl/dvc.yaml +++ b/examples/security/kdd-nsl/dvc.yaml @@ -70,7 +70,7 @@ stages: - rbf - poly do: - cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model --params_file best_${item} --study_name=${item} --default_config model.yaml + cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model --params_file best_${item} --study_name=${item} --default_config default.yaml --storage_name sqlite:///model.db outs: - conf/model/best_${item}.yaml deps: @@ -108,7 +108,7 @@ stages: - rbf - poly do: - cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack --params_file best_${item} --study_name=best_${item} --default_config attack.yaml + cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack --params_file best_${item} --study_name=best_${item} --default_config default.yaml --storage_name sqlite:///attack.db --direction minimize outs: - conf/attack/best_${item}.yaml deps: @@ -118,7 +118,6 @@ stages: retrain: cmd : python retrain.py deps: - - ${files.directory}/models/ - ${files.directory}/attacks/ - conf/attack/best_linear.yaml - conf/attack/best_rbf.yaml @@ -134,18 +133,22 @@ stages: plots: cmd : python plots.py deps : + - plots.py - plots/after_retrain_confidence.csv - output/attack.csv - plots/before_retrain_confidence.csv - output/train.csv plots : - - plots/accuracy_vs_attack_parameters.pdf - # - plots/accuracy_vs_features.pdf - # - plots/accuracy_vs_samples.pdf - - plots/confidence_vs_attack_parameters.pdf - - plots/train_time_vs_attack_parameters.pdf - # - plots/train_time_vs_features.pdf - # - plots/train_time_vs_samples.pdf - - plots/retrain_accuracy.pdf - - plots/retrain_confidence_vs_attack_parameters.pdf - - plots/retrain_time.pdf + - plots/accuracy_vs_attack_parameters.eps + # - plots/accuracy_vs_features.eps + # - plots/accuracy_vs_samples.eps + - plots/confidence_vs_attack_parameters.eps + - plots/train_time_vs_attack_parameters.eps + # - plots/train_time_vs_features.eps + # - plots/train_time_vs_samples.eps + - plots/retrain_accuracy.eps + - plots/retrain_confidence_vs_attack_parameters.eps + - plots/retrain_time.eps + move_files: + cmd: >- + cp -r plots/* ~/KDD-Paper-EAI-AISEC/kdd-nsl/ diff --git a/examples/security/kdd-nsl/plots.py b/examples/security/kdd-nsl/plots.py index 06375d98..b5499185 100644 --- a/examples/security/kdd-nsl/plots.py +++ b/examples/security/kdd-nsl/plots.py @@ -18,28 +18,16 @@ # else: # results = parse_results("reports/model_queue/") results = pd.read_csv("output/train.csv") -# input_size = results["data.generate.kwargs.n_samples"] * results["data.generate.kwargs.n_features"] -results["Kernel"] = results["model.init.kwargs.kernel"].copy() -# results["Features"] = results["data.generate.kwargs.n_features"].copy() -results["Samples"] = results["data.sample.train_size"].copy() -# results["input_size"] = input_size -# sample_list = results["data.generate.kwargs.n_samples"].unique() -# feature_list = results["data.generate.kwargs.n_features"].unique() -kernel_list = results["model.init.kwargs.kernel"].unique() +results["Kernel"] = results["model.init.kernel"].copy() if "Unnamed: 0" in results.columns: del results["Unnamed: 0"] for col in results.columns: if col == "data.name" and isinstance(results[col][0], list): results[col] = results[col].apply(lambda x: x[0]) -results = results[results["model.init.kwargs.kernel"] != "sigmoid"] +results = results[results["model.init.kernel"] != "sigmoid"] attack_results = pd.read_csv("output/attack.csv") -attack_results["Kernel"] = attack_results["model.init.kwargs.kernel"].copy() -# attack_results["Features"] = attack_results["data.generate.kwargs.n_features"].copy() -# attack_results["Samples"] = attack_results["data.sample.train_size"].copy() -# sample_list = attack_results["data.generate.kwargs.n_samples"].unique() -# feature_list = attack_results["data.generate.kwargs.n_features"].unique() -kernel_list = attack_results["model.init.kwargs.kernel"].unique() +attack_results["Kernel"] = attack_results["model.init.kernel"].copy() if "Unnamed: 0" in attack_results.columns: del attack_results["Unnamed: 0"] for col in attack_results.columns: @@ -47,75 +35,26 @@ attack_results[col] = attack_results[col].apply(lambda x: x[0]) -# graph1 = sns.lineplot( -# x="data.sample.train_size", -# y="accuracy", -# data=results, -# style="Kernel", -# style_order=["rbf", "poly", "linear"], -# ) -# graph1.legend(labels=["Linear", "RBF", "Poly"]) -# graph1.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") -# graph1.set_xlabel("Number of Samples") -# graph1.set_ylabel("Accuracy") -# graph1.set_xscale("log") -# graph1.get_figure().tight_layout() -# graph1.get_figure().savefig("plots/accuracy_vs_samples.pdf") -# plt.gcf().clear() - -# graph2 = sns.lineplot( -# x="data.generate.kwargs.n_features", -# y="accuracy", -# data=results, -# style="Kernel", -# style_order=["rbf", "poly", "linear"], -# ) -# graph2.set_xlabel("Number of Features") -# graph2.set_ylabel("Accuracy") -# graph2.set_xscale("log") -# graph2.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") -# graph2.get_figure().tight_layout() -# graph2.get_figure().savefig("plots/accuracy_vs_features.pdf") -# plt.gcf().clear() - -# results["train_time"] = ( -# results["train_time"] -# * results["data.sample.train_size"] -# * results["data.generate.kwargs.n_samples"] -# ) -# graph3 = sns.lineplot( -# x="data.generate.kwargs.n_features", -# y="train_time", -# data=results, -# style="Kernel", -# style_order=["rbf", "poly", "linear"], -# ) -# graph3.set_xlabel("Number of Features") -# graph3.set_ylabel("Training Time") -# graph3.set(yscale="log", xscale="log") -# graph3.legend(title="Kernel") -# graph3.get_figure().tight_layout() -# graph3.get_figure().savefig("plots/train_time_vs_features.pdf") -# plt.gcf().clear() - -# graph4 = sns.lineplot( -# x="data.sample.train_size", -# y="train_time", -# data=results, -# style="Kernel", -# style_order=["rbf", "poly", "linear"], -# ) -# graph4.set_xlabel("Number of Samples") -# graph4.set_ylabel("Training Time") -# graph4.set(yscale="log", xscale="log") -# graph4.legend(title="Kernel") -# graph4.get_figure().tight_layout() -# graph4.get_figure().savefig("plots/train_time_vs_samples.pdf") -# plt.gcf().clear() +graph4 = sns.lineplot( + x="data.sample.train_size", + y="train_time", + data=results, + style="Kernel", + style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), +) +graph4.set_xlabel("Number of Samples") +graph4.set_ylabel("Training Time") +graph4.set(yscale="log", xscale="log", xlim=(10, 1e6)) +graph4.legend(title="Kernel") +graph4.get_figure().tight_layout() +graph4.get_figure().savefig("plots/train_time_vs_samples.eps") +plt.gcf().clear() fig, ax = plt.subplots(2, 2) graph5 = sns.lineplot( - x="attack.init.kwargs.eps", + x="attack.init.eps", y="accuracy", data=attack_results, style="Kernel", @@ -123,20 +62,24 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph5.set(xscale="log", xlabel="Perturbation Distance", ylabel="Accuracy") graph6 = sns.lineplot( - x="attack.init.kwargs.eps_step", + x="attack.init.eps_step", y="accuracy", data=attack_results, style="Kernel", ax=ax[0, 1], color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph6.set(xscale="log", xlabel="Perturbation Step", ylabel="Accuracy") graph7 = sns.lineplot( - x="attack.init.kwargs.max_iter", + x="attack.init.max_iter", y="accuracy", data=attack_results, style="Kernel", @@ -144,10 +87,12 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph7.set(xscale="log", xlabel="Maximum Iterations", ylabel="Accuracy") graph8 = sns.lineplot( - x="attack.init.kwargs.batch_size", + x="attack.init.batch_size", y="accuracy", data=attack_results, style="Kernel", @@ -155,16 +100,18 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph8.set(xscale="log", xlabel="Batch Size", ylabel="Accuracy") graph6.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") fig.tight_layout() -fig.savefig("plots/accuracy_vs_attack_parameters.pdf") +fig.savefig("plots/accuracy_vs_attack_parameters.eps") plt.gcf().clear() fig, ax = plt.subplots(2, 2) graph9 = sns.lineplot( - x="attack.init.kwargs.eps", + x="attack.init.eps", y="adv_fit_time", data=attack_results, style="Kernel", @@ -172,20 +119,24 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="Attack Time") graph10 = sns.lineplot( - x="attack.init.kwargs.eps_step", + x="attack.init.eps_step", y="adv_fit_time", data=attack_results, style="Kernel", ax=ax[0, 1], color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="Attack Time") graph11 = sns.lineplot( - x="attack.init.kwargs.max_iter", + x="attack.init.max_iter", y="adv_fit_time", data=attack_results, style="Kernel", @@ -193,10 +144,12 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="Attack Time") graph12 = sns.lineplot( - x="attack.init.kwargs.batch_size", + x="attack.init.batch_size", y="adv_fit_time", data=attack_results, style="Kernel", @@ -204,11 +157,13 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph12.set(xscale="log", xlabel="Batch Size", ylabel="Attack Time") graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") fig.tight_layout(h_pad=0.5) -fig.savefig("plots/train_time_vs_attack_parameters.pdf") +fig.savefig("plots/train_time_vs_attack_parameters.eps") plt.gcf().clear() retrain_df = pd.DataFrame() @@ -231,6 +186,8 @@ data=retrain_df, style="Kernel", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) retrain = sns.lineplot( x="Epochs", @@ -240,12 +197,14 @@ color="darkred", legend=False, style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) retrain.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") retrain.set_xlabel("Retraining Epochs") retrain.set_ylabel("Accuracy") retrain.get_figure().tight_layout() -retrain.get_figure().savefig("plots/retrain_accuracy.pdf") +retrain.get_figure().savefig("plots/retrain_accuracy.eps") plt.gcf().clear() retrain_df["ben_time"] = retrain_df["ben_time"] * retrain_df["train_size"] * 10 @@ -256,6 +215,8 @@ data=retrain_df, style="Kernel", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) retrain = sns.lineplot( x="Epochs", @@ -265,13 +226,15 @@ color="darkred", legend=False, style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) retrain.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") retrain.set_xlabel("Retraining Epochs") retrain.set_ylabel("Time") retrain.set_yscale("log") retrain.get_figure().tight_layout() -retrain.get_figure().savefig("plots/retrain_time.pdf") +retrain.get_figure().savefig("plots/retrain_time.eps") plt.gcf().clear() confidence_df = pd.read_csv("plots/before_retrain_confidence.csv") @@ -285,6 +248,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="False Confidence") graph10 = sns.lineplot( @@ -295,6 +260,8 @@ ax=ax[0, 1], color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="False Confidence") graph11 = sns.lineplot( @@ -306,6 +273,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="False Confidence") graph12 = sns.lineplot( @@ -317,11 +286,13 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph12.set(xscale="log", xlabel="Batch Size", ylabel="False Confidence") graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") fig.tight_layout(h_pad=0.5) -fig.savefig("plots/confidence_vs_attack_parameters.pdf") +fig.savefig("plots/confidence_vs_attack_parameters.eps") plt.gcf().clear() confdence_df = pd.read_csv("plots/after_retrain_confidence.csv") @@ -336,6 +307,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="False Confidence") graph10 = sns.lineplot( @@ -346,6 +319,8 @@ ax=ax[0, 1], color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="False Confidence") graph11 = sns.lineplot( @@ -357,6 +332,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="False Confidence") graph12 = sns.lineplot( @@ -368,9 +345,11 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph12.set(xscale="log", xlabel="Batch Size", ylabel="False Confidence") graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") fig.tight_layout(h_pad=0.5) -fig.savefig("plots/retrain_confidence_vs_attack_parameters.pdf") +fig.savefig("plots/retrain_confidence_vs_attack_parameters.eps") plt.gcf().clear() diff --git a/examples/security/kdd-nsl/plots/.gitignore b/examples/security/kdd-nsl/plots/.gitignore index 642f14d4..f09089fa 100644 --- a/examples/security/kdd-nsl/plots/.gitignore +++ b/examples/security/kdd-nsl/plots/.gitignore @@ -4,9 +4,3 @@ /retrain_accuracy.eps /retrain_confidence_vs_attack_parameters.eps /retrain_time.eps -/accuracy_vs_attack_parameters.pdf -/confidence_vs_attack_parameters.pdf -/train_time_vs_attack_parameters.pdf -/retrain_accuracy.pdf -/retrain_confidence_vs_attack_parameters.pdf -/retrain_time.pdf diff --git a/examples/security/kdd-nsl/plots/train_time_vs_samples.eps b/examples/security/kdd-nsl/plots/train_time_vs_samples.eps new file mode 100644 index 00000000..8646b377 --- /dev/null +++ b/examples/security/kdd-nsl/plots/train_time_vs_samples.eps @@ -0,0 +1,1373 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Title: train_time_vs_samples.eps +%%Creator: Matplotlib v3.7.2, https://matplotlib.org/ +%%CreationDate: Tue Jul 16 16:20:51 2024 +%%Orientation: portrait +%%BoundingBox: 75 223 537 569 +%%HiResBoundingBox: 75.600000 223.200000 536.400000 568.800000 +%%EndComments +%%BeginProlog +/mpldict 11 dict def +mpldict begin +/_d { bind def } bind def +/m { moveto } _d +/l { lineto } _d +/r { rlineto } _d +/c { curveto } _d +/cl { closepath } _d +/ce { closepath eofill } _d +/box { + m + 1 index 0 r + 0 exch r + neg 0 r + cl + } _d +/clipbox { + box + clip + newpath + } _d +/sc { setcachedevice } _d +%!PS-Adobe-3.0 Resource-Font +%%Creator: Converted from TrueType to Type 3 by Matplotlib. +10 dict begin +/FontName /DejaVuSerif def +/PaintType 0 def +/FontMatrix [0.00048828125 0 0 0.00048828125 0 0] def +/FontBBox [-1576 -710 4312 2272] def +/FontType 3 def +/Encoding [/minus /space /period /zero /one /two /three /four /five /six /eight /K /N /S /T /multiply /a /b /e /f /g /i /l /m /n /o /p /r /s /u /y] def +/CharStrings 32 dict dup begin +/.notdef 0 def +/minus{1716 0 217 561 1499 723 sc +217 723 m +1499 723 l +1499 561 l +217 561 l +217 723 l + +ce} _d +/space{651 0 0 0 0 0 sc +ce} _d +/period{651 0 193 -29 459 238 sc +193 104 m +193 141 206 173 231 199 c +256 225 288 238 326 238 c +363 238 394 225 420 199 c +446 173 459 141 459 104 c +459 67 446 36 420 10 c +394 -16 363 -29 326 -29 c +288 -29 256 -16 231 9 c +206 35 193 67 193 104 c + +ce} _d +/zero{1303 0 135 -29 1167 1520 sc +651 70 m +753 70 829 126 880 238 c +931 350 956 519 956 745 c +956 972 931 1141 880 1253 c +829 1365 753 1421 651 1421 c +549 1421 473 1365 422 1253 c +371 1141 346 972 346 745 c +346 519 371 350 422 238 c +473 126 549 70 651 70 c + +651 -29 m +489 -29 362 39 271 175 c +180 311 135 501 135 745 c +135 990 180 1180 271 1316 c +362 1452 489 1520 651 1520 c +814 1520 940 1452 1031 1316 c +1122 1180 1167 990 1167 745 c +1167 501 1122 311 1031 175 c +940 39 814 -29 651 -29 c + +ce} _d +/one{1303 0 250 0 1012 1520 sc +291 0 m +291 106 l +551 106 l +551 1348 l +250 1153 l +250 1284 l +614 1520 l +752 1520 l +752 106 l +1012 106 l +1012 0 l +291 0 l + +ce} _d +/two{1303 0 139 0 1102 1520 sc +262 1137 m +150 1137 l +150 1403 l +221 1441 293 1470 365 1490 c +438 1510 509 1520 578 1520 c +733 1520 856 1482 946 1407 c +1036 1332 1081 1229 1081 1100 c +1081 954 979 779 775 576 c +759 561 747 549 739 541 c +362 164 l +985 164 l +985 348 l +1102 348 l +1102 0 l +139 0 l +139 109 l +592 561 l +692 661 763 753 806 836 c +849 920 870 1008 870 1100 c +870 1201 844 1279 791 1336 c +739 1393 667 1421 575 1421 c +480 1421 406 1397 354 1350 c +302 1303 271 1232 262 1137 c + +ce} _d +/three{1303 0 156 -29 1151 1520 sc +199 1430 m +277 1459 352 1482 423 1497 c +495 1512 562 1520 625 1520 c +771 1520 885 1488 967 1425 c +1049 1362 1090 1275 1090 1163 c +1090 1073 1062 998 1005 937 c +948 877 868 836 764 815 c +887 798 982 753 1049 681 c +1117 610 1151 517 1151 403 c +1151 264 1104 157 1010 82 c +917 8 782 -29 606 -29 c +528 -29 452 -21 377 -4 c +303 13 229 38 156 72 c +156 362 l +268 362 l +275 266 307 193 365 144 c +423 95 505 70 610 70 c +712 70 792 99 851 158 c +910 217 940 298 940 401 c +940 518 910 607 849 667 c +788 728 699 758 582 758 c +487 758 l +487 860 l +537 860 l +654 860 741 884 799 932 c +858 981 887 1054 887 1151 c +887 1238 863 1305 815 1351 c +767 1398 698 1421 608 1421 c +518 1421 448 1400 398 1357 c +349 1314 320 1251 311 1167 c +199 1167 l +199 1430 l + +ce} _d +/four{1303 0 63 0 1200 1520 sc +715 506 m +715 1300 l +205 506 l +715 506 l + +1155 0 m +475 0 l +475 106 l +715 106 l +715 399 l +63 399 l +63 508 l +717 1520 l +915 1520 l +915 506 l +1200 506 l +1200 399 l +915 399 l +915 106 l +1155 106 l +1155 0 l + +ce} _d +/five{1303 0 174 -29 1145 1493 sc +1030 1493 m +1030 1329 l +346 1329 l +346 901 l +381 925 421 943 467 955 c +514 967 566 973 623 973 c +784 973 912 928 1005 839 c +1098 750 1145 628 1145 473 c +1145 315 1098 192 1003 103 c +909 15 777 -29 606 -29 c +537 -29 467 -21 395 -4 c +323 13 249 38 174 72 c +174 362 l +287 362 l +293 267 323 195 377 145 c +432 95 508 70 606 70 c +711 70 792 105 849 174 c +906 243 934 343 934 473 c +934 602 906 701 849 770 c +793 839 712 874 606 874 c +546 874 493 863 447 842 c +402 821 361 788 326 743 c +240 743 l +240 1493 l +1030 1493 l + +ce} _d +/six{1303 0 137 -29 1174 1520 sc +670 70 m +764 70 836 104 887 173 c +938 242 963 342 963 471 c +963 600 938 699 887 768 c +836 837 764 872 670 872 c +575 872 502 839 452 772 c +402 705 377 609 377 483 c +377 350 402 248 453 177 c +504 106 576 70 670 70 c + +344 822 m +389 872 441 909 498 934 c +555 959 620 971 692 971 c +841 971 958 926 1044 837 c +1131 748 1174 626 1174 471 c +1174 320 1127 198 1034 107 c +941 16 817 -29 662 -29 c +493 -29 364 34 273 159 c +182 285 137 465 137 698 c +137 959 191 1162 298 1305 c +405 1448 557 1520 752 1520 c +805 1520 860 1515 918 1505 c +976 1495 1035 1480 1096 1460 c +1096 1214 l +983 1214 l +975 1281 949 1333 906 1368 c +863 1403 804 1421 731 1421 c +602 1421 505 1372 442 1274 c +379 1176 346 1025 344 822 c + +ce} _d +/eight{1303 0 137 -29 1165 1520 sc +954 408 m +954 515 927 597 874 656 c +821 715 747 745 651 745 c +555 745 480 715 427 656 c +374 597 348 515 348 408 c +348 301 374 217 427 158 c +480 99 555 70 651 70 c +747 70 821 99 874 158 c +927 217 954 301 954 408 c + +913 1133 m +913 1224 890 1294 844 1345 c +798 1396 734 1421 651 1421 c +569 1421 505 1396 458 1345 c +412 1294 389 1224 389 1133 c +389 1042 412 971 458 920 c +505 869 569 844 651 844 c +734 844 798 869 844 920 c +890 971 913 1042 913 1133 c + +805 795 m +918 780 1007 738 1070 669 c +1133 601 1165 514 1165 408 c +1165 268 1121 160 1032 84 c +943 9 816 -29 651 -29 c +486 -29 359 9 270 84 c +181 160 137 268 137 408 c +137 514 169 601 232 669 c +295 738 384 780 498 795 c +397 813 320 851 266 909 c +213 968 186 1042 186 1133 c +186 1253 227 1347 310 1416 c +393 1485 506 1520 651 1520 c +796 1520 909 1485 992 1416 c +1075 1347 1116 1253 1116 1133 c +1116 1042 1089 968 1035 909 c +982 851 905 813 805 795 c + +ce} _d +/K{1530 0 113 0 1561 1493 sc +113 0 m +113 106 l +303 106 l +303 1386 l +113 1386 l +113 1493 l +696 1493 l +696 1386 l +506 1386 l +506 821 l +1149 1386 l +987 1386 l +987 1493 l +1483 1493 l +1483 1386 l +1315 1386 l +674 823 l +1391 106 l +1561 106 l +1561 0 l +1214 0 l +506 709 l +506 106 l +696 106 l +696 0 l +113 0 l + +ce} _d +/N{1792 0 100 -29 1702 1493 sc +100 0 m +100 106 l +301 106 l +301 1386 l +100 1386 l +100 1493 l +483 1493 l +1378 315 l +1378 1386 l +1178 1386 l +1178 1493 l +1702 1493 l +1702 1386 l +1501 1386 l +1501 -29 l +1380 -29 l +424 1229 l +424 106 l +625 106 l +625 0 l +100 0 l + +ce} _d +/S{1403 0 172 -29 1253 1520 sc +190 72 m +190 412 l +305 411 l +308 298 341 214 403 159 c +466 105 561 78 688 78 c +807 78 897 101 959 148 c +1022 195 1053 264 1053 354 c +1053 426 1034 481 996 520 c +959 559 879 596 758 633 c +561 692 l +418 735 318 789 259 854 c +201 919 172 1007 172 1120 c +172 1247 217 1345 307 1415 c +397 1485 523 1520 686 1520 c +755 1520 831 1512 914 1497 c +997 1482 1085 1461 1178 1432 c +1178 1114 l +1065 1114 l +1054 1219 1018 1295 959 1342 c +900 1389 811 1413 690 1413 c +585 1413 504 1391 449 1348 c +394 1305 367 1243 367 1161 c +367 1090 388 1034 429 993 c +470 952 558 912 692 872 c +877 817 l +1012 776 1109 724 1166 661 c +1224 598 1253 514 1253 408 c +1253 263 1207 154 1114 81 c +1021 8 883 -29 700 -29 c +618 -29 534 -21 449 -4 c +364 13 278 38 190 72 c + +ce} _d +/T{1366 0 20 0 1346 1493 sc +391 0 m +391 106 l +582 106 l +582 1374 l +143 1374 l +143 1141 l +20 1141 l +20 1493 l +1346 1493 l +1346 1141 l +1223 1141 l +1223 1374 l +784 1374 l +784 106 l +975 106 l +975 0 l +391 0 l + +ce} _d +/multiply{1716 0 283 68 1434 1217 sc +1434 1104 m +971 641 l +1434 180 l +1319 68 l +858 528 l +397 68 l +283 180 l +743 641 l +283 1104 l +397 1217 l +858 756 l +1319 1217 l +1434 1104 l + +ce} _d +/a{1221 0 102 -29 1163 1092 sc +815 334 m +815 559 l +578 559 l +487 559 419 539 374 500 c +329 461 307 400 307 319 c +307 245 330 186 375 143 c +420 100 482 78 559 78 c +636 78 697 102 744 149 c +791 196 815 258 815 334 c + +999 664 m +999 106 l +1163 106 l +1163 0 l +815 0 l +815 115 l +774 66 727 29 674 6 c +621 -17 558 -29 487 -29 c +369 -29 275 2 206 65 c +137 128 102 212 102 319 c +102 429 142 514 221 575 c +300 636 412 666 557 666 c +815 666 l +815 739 l +815 820 790 882 741 926 c +692 971 624 993 535 993 c +462 993 403 976 360 943 c +317 910 290 860 279 795 c +184 795 l +184 1010 l +248 1037 310 1058 370 1071 c +431 1085 490 1092 547 1092 c +694 1092 806 1055 883 982 c +960 909 999 803 999 664 c + +ce} _d +/b{1311 0 59 -29 1208 1556 sc +236 106 m +236 1450 l +59 1450 l +59 1556 l +420 1556 l +420 897 l +456 964 502 1013 557 1044 c +613 1076 682 1092 764 1092 c +895 1092 1001 1040 1084 937 c +1167 834 1208 699 1208 532 c +1208 365 1167 230 1084 126 c +1001 23 895 -29 764 -29 c +682 -29 613 -13 557 18 c +502 50 456 99 420 166 c +420 0 l +59 0 l +59 106 l +236 106 l + +420 479 m +420 351 444 253 493 186 c +542 119 614 86 707 86 c +801 86 872 124 920 199 c +969 274 993 385 993 532 c +993 679 969 790 920 865 c +872 940 801 977 707 977 c +614 977 542 943 493 876 c +444 809 420 711 420 584 c +420 479 l + +ce} _d +/e{1212 0 102 -29 1110 1092 sc +1110 512 m +317 512 l +317 504 l +317 361 344 252 398 179 c +452 106 532 70 637 70 c +718 70 784 91 835 133 c +887 176 923 239 944 322 c +1092 322 l +1063 205 1008 118 929 59 c +850 0 747 -29 618 -29 c +463 -29 338 22 243 124 c +149 227 102 363 102 532 c +102 700 148 835 241 938 c +334 1041 455 1092 606 1092 c +767 1092 890 1042 976 943 c +1062 844 1107 701 1110 512 c + +893 618 m +889 742 863 835 814 898 c +766 961 697 993 606 993 c +521 993 455 961 406 898 c +357 835 328 741 317 618 c +893 618 l + +ce} _d +/f{758 0 74 0 881 1556 sc +881 1305 m +784 1305 l +783 1355 769 1393 741 1419 c +714 1445 674 1458 621 1458 c +552 1458 504 1439 476 1401 c +448 1364 434 1297 434 1200 c +434 1063 l +731 1063 l +731 956 l +434 956 l +434 106 l +670 106 l +670 0 l +74 0 l +74 106 l +250 106 l +250 956 l +74 956 l +74 1063 l +250 1063 l +250 1196 l +250 1315 281 1404 342 1465 c +404 1526 495 1556 614 1556 c +659 1556 703 1552 748 1544 c +793 1536 837 1524 881 1507 c +881 1305 l + +ce} _d +/g{1311 0 102 -455 1251 1092 sc +1075 956 m +1075 23 l +1075 -130 1033 -247 949 -330 c +865 -413 745 -455 590 -455 c +520 -455 453 -449 389 -436 c +325 -423 264 -404 205 -379 c +205 -156 l +301 -156 l +313 -225 341 -276 386 -308 c +431 -340 495 -356 578 -356 c +686 -356 765 -325 815 -264 c +866 -203 891 -108 891 23 c +891 166 l +855 99 809 50 753 18 c +698 -13 629 -29 547 -29 c +416 -29 309 23 226 126 c +143 230 102 365 102 532 c +102 699 143 834 226 937 c +309 1040 416 1092 547 1092 c +629 1092 698 1076 753 1044 c +809 1013 855 964 891 897 c +891 1063 l +1251 1063 l +1251 956 l +1075 956 l + +891 584 m +891 711 866 809 817 876 c +768 943 697 977 604 977 c +509 977 438 940 389 865 c +341 790 317 679 317 532 c +317 385 341 274 389 199 c +438 124 509 86 604 86 c +697 86 768 119 817 186 c +866 253 891 351 891 479 c +891 584 l + +ce} _d +/i{655 0 74 0 608 1507 sc +199 1393 m +199 1424 210 1450 232 1473 c +255 1496 282 1507 313 1507 c +344 1507 370 1496 392 1473 c +415 1450 426 1424 426 1393 c +426 1362 415 1335 393 1313 c +371 1291 344 1280 313 1280 c +282 1280 255 1291 232 1313 c +210 1335 199 1362 199 1393 c + +434 106 m +608 106 l +608 0 l +74 0 l +74 106 l +250 106 l +250 956 l +74 956 l +74 1063 l +434 1063 l +434 106 l + +ce} _d +/l{655 0 59 0 594 1556 sc +420 106 m +594 106 l +594 0 l +59 0 l +59 106 l +236 106 l +236 1450 l +59 1450 l +59 1556 l +420 1556 l +420 106 l + +ce} _d +/m{1942 0 74 0 1886 1092 sc +1061 856 m +1096 934 1141 993 1196 1032 c +1251 1072 1316 1092 1389 1092 c +1500 1092 1583 1057 1638 988 c +1693 919 1720 815 1720 676 c +1720 106 l +1886 106 l +1886 0 l +1376 0 l +1376 106 l +1536 106 l +1536 655 l +1536 764 1520 841 1488 887 c +1456 933 1403 956 1329 956 c +1247 956 1184 925 1141 863 c +1098 801 1077 711 1077 592 c +1077 106 l +1237 106 l +1237 0 l +733 0 l +733 106 l +893 106 l +893 662 l +893 768 877 843 845 888 c +813 933 760 956 686 956 c +604 956 541 925 498 863 c +455 801 434 711 434 592 c +434 106 l +594 106 l +594 0 l +84 0 l +84 106 l +250 106 l +250 958 l +74 958 l +74 1063 l +434 1063 l +434 874 l +468 945 511 999 564 1036 c +617 1073 676 1092 743 1092 c +826 1092 895 1071 950 1030 c +1005 989 1042 931 1061 856 c + +ce} _d +/n{1319 0 74 0 1262 1092 sc +84 0 m +84 106 l +250 106 l +250 956 l +74 956 l +74 1063 l +434 1063 l +434 874 l +468 946 512 1000 566 1037 c +621 1074 684 1092 756 1092 c +873 1092 960 1058 1015 991 c +1070 924 1098 819 1098 676 c +1098 106 l +1262 106 l +1262 0 l +754 0 l +754 106 l +913 106 l +913 618 l +913 748 897 837 865 885 c +833 934 777 958 696 958 c +611 958 546 927 501 864 c +456 802 434 711 434 592 c +434 106 l +594 106 l +594 0 l +84 0 l + +ce} _d +/o{1233 0 102 -29 1130 1092 sc +616 70 m +715 70 789 109 839 187 c +890 265 915 380 915 532 c +915 684 890 799 839 876 c +789 954 715 993 616 993 c +517 993 443 954 392 876 c +342 799 317 684 317 532 c +317 380 342 265 393 187 c +444 109 518 70 616 70 c + +616 -29 m +461 -29 337 22 243 124 c +149 227 102 363 102 532 c +102 701 149 837 242 939 c +336 1041 461 1092 616 1092 c +771 1092 896 1041 989 939 c +1083 837 1130 701 1130 532 c +1130 363 1083 227 989 124 c +896 22 771 -29 616 -29 c + +ce} _d +/p{1311 0 59 -426 1208 1092 sc +420 584 m +420 479 l +420 351 444 253 493 186 c +542 119 614 86 707 86 c +801 86 872 124 920 199 c +969 274 993 385 993 532 c +993 679 969 790 920 865 c +872 940 801 977 707 977 c +614 977 542 943 493 876 c +444 809 420 711 420 584 c + +236 956 m +59 956 l +59 1063 l +420 1063 l +420 897 l +456 964 502 1013 557 1044 c +613 1076 682 1092 764 1092 c +895 1092 1001 1040 1084 937 c +1167 834 1208 699 1208 532 c +1208 365 1167 230 1084 126 c +1001 23 895 -29 764 -29 c +682 -29 613 -13 557 18 c +502 50 456 99 420 166 c +420 -319 l +594 -319 l +594 -426 l +59 -426 l +59 -319 l +236 -319 l +236 956 l + +ce} _d +/r{979 0 74 0 979 1092 sc +979 1065 m +979 799 l +873 799 l +870 852 855 891 829 917 c +803 943 765 956 715 956 c +624 956 555 925 506 862 c +458 799 434 709 434 592 c +434 106 l +647 106 l +647 0 l +84 0 l +84 106 l +250 106 l +250 958 l +74 958 l +74 1063 l +434 1063 l +434 874 l +470 948 516 1003 573 1038 c +630 1074 699 1092 780 1092 c +810 1092 841 1090 874 1085 c +907 1080 942 1074 979 1065 c + +ce} _d +/s{1051 0 115 -29 946 1092 sc +115 59 m +115 307 l +221 307 l +224 228 248 168 295 129 c +342 90 412 70 504 70 c +587 70 650 85 693 116 c +736 147 758 193 758 252 c +758 299 742 336 710 365 c +679 394 612 424 510 457 c +377 502 l +286 531 219 568 178 612 c +137 656 117 712 117 780 c +117 877 153 954 224 1009 c +295 1064 394 1092 520 1092 c +576 1092 635 1085 697 1070 c +759 1055 823 1034 889 1006 c +889 774 l +783 774 l +780 843 756 896 711 935 c +666 974 604 993 526 993 c +449 993 390 979 350 952 c +311 925 291 884 291 829 c +291 784 306 748 336 721 c +366 694 426 667 516 639 c +662 594 l +763 563 835 523 879 476 c +924 429 946 369 946 295 c +946 194 907 115 830 57 c +753 -0 647 -29 512 -29 c +443 -29 376 -22 311 -7 c +246 8 180 30 115 59 c + +ce} _d +/u{1319 0 55 -29 1243 1063 sc +725 1063 m +1069 1063 l +1069 106 l +1243 106 l +1243 0 l +885 0 l +885 188 l +851 117 807 63 753 26 c +699 -11 636 -29 565 -29 c +447 -29 360 4 304 71 c +249 138 221 244 221 387 c +221 956 l +55 956 l +55 1063 l +406 1063 l +406 444 l +406 315 422 226 453 178 c +485 130 542 106 623 106 c +708 106 773 137 818 200 c +863 263 885 354 885 473 c +885 956 l +725 956 l +725 1063 l + +ce} _d +/y{1157 0 -6 -455 1151 1063 sc +442 -195 m +512 -18 l +115 956 l +-6 956 l +-6 1063 l +483 1063 l +483 956 l +313 956 l +612 225 l +911 956 l +752 956 l +752 1063 l +1151 1063 l +1151 956 l +1032 956 l +545 -240 l +512 -323 475 -379 434 -409 c +393 -440 336 -455 262 -455 c +231 -455 198 -452 165 -447 c +132 -442 99 -434 66 -424 c +66 -221 l +160 -221 l +164 -266 175 -299 194 -318 c +213 -338 243 -348 283 -348 c +320 -348 349 -338 371 -317 c +394 -297 417 -256 442 -195 c + +ce} _d +end readonly def + +/BuildGlyph { + exch begin + CharStrings exch + 2 copy known not {pop /.notdef} if + true 3 1 roll get exec + end +} _d + +/BuildChar { + 1 index /Encoding get exch get + 1 index /BuildGlyph get exec +} _d + +FontName currentdict end definefont pop +end +%%EndProlog +mpldict begin +75.6 223.2 translate +460.8 345.6 0 0 clipbox +gsave +0 0 m +460.8 0 l +460.8 345.6 l +0 345.6 l +cl +1.000 setgray +fill +grestore +gsave +108.312798 56.796 m +433.392671 56.796 l +433.392671 330.048 l +108.312798 330.048 l +cl +1.000 setgray +fill +grestore +0.800 setlinewidth +1 setlinejoin +1 setlinecap +[] 0 setdash +0.800 setgray +gsave +325.08 273.252 108.313 56.796 clipbox +108.312798 56.796 m +108.312798 330.048 l +stroke +grestore +0.150 setgray +gsave +96.3128 38.1991 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/one glyphshow +8.43005 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +16.9782 5.24062 moveto +/one glyphshow +grestore +0.800 setgray +gsave +325.08 273.252 108.313 56.796 clipbox +173.328772 56.796 m +173.328772 330.048 l +stroke +grestore +0.150 setgray +gsave +161.329 38.1991 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/one glyphshow +8.43005 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +16.9782 5.24062 moveto +/two glyphshow +grestore +0.800 setgray +gsave +325.08 273.252 108.313 56.796 clipbox +238.344747 56.796 m +238.344747 330.048 l +stroke +grestore +0.150 setgray +gsave +226.345 38.1991 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/one glyphshow +8.43005 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +16.9782 5.24062 moveto +/three glyphshow +grestore +0.800 setgray +gsave +325.08 273.252 108.313 56.796 clipbox +303.360722 56.796 m +303.360722 330.048 l +stroke +grestore +0.150 setgray +gsave +291.361 38.1991 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/one glyphshow +8.43005 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +16.9782 5.24062 moveto +/four glyphshow +grestore +0.800 setgray +gsave +325.08 273.252 108.313 56.796 clipbox +368.376696 56.796 m +368.376696 330.048 l +stroke +grestore +0.150 setgray +gsave +356.377 38.1991 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.635938 moveto +/one glyphshow +8.43005 0.635938 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +16.9782 5.36094 moveto +/five glyphshow +grestore +0.800 setgray +gsave +325.08 273.252 108.313 56.796 clipbox +433.392671 56.796 m +433.392671 330.048 l +stroke +grestore +0.150 setgray +gsave +421.393 38.1991 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/one glyphshow +8.43005 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +16.9782 5.24062 moveto +/six glyphshow +grestore +/DejaVuSerif 14.400 selectfont +gsave + +198.384 20.8554 translate +0 rotate +0 0 m /N glyphshow +12.5781 0 m /u glyphshow +21.8362 0 m /m glyphshow +35.4672 0 m /b glyphshow +44.6692 0 m /e glyphshow +53.1763 0 m /r glyphshow +60.0479 0 m /space glyphshow +64.6173 0 m /o glyphshow +73.2718 0 m /f glyphshow +78.5922 0 m /space glyphshow +83.1616 0 m /S glyphshow +93.0093 0 m /a glyphshow +101.58 0 m /m glyphshow +115.211 0 m /p glyphshow +124.413 0 m /l glyphshow +129.01 0 m /e glyphshow +137.517 0 m /s glyphshow +grestore +gsave +33.7128 66.0988 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/two glyphshow +8.43005 0.515625 moveto +/period glyphshow +12.2668 0.515625 moveto +/two glyphshow +23.2783 0.515625 moveto +/multiply glyphshow +36.9618 0.515625 moveto +/one glyphshow +45.3918 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +53.94 5.24062 moveto +/minus glyphshow +61.7115 5.24062 moveto +/two glyphshow +grestore +gsave +32.7128 113.977 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/two glyphshow +8.43005 0.515625 moveto +/period glyphshow +12.6418 0.515625 moveto +/four glyphshow +23.6533 0.515625 moveto +/multiply glyphshow +37.3368 0.515625 moveto +/one glyphshow +45.7668 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +54.315 5.24062 moveto +/minus glyphshow +62.0865 5.24062 moveto +/two glyphshow +grestore +gsave +32.7128 158.021 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/two glyphshow +8.43005 0.515625 moveto +/period glyphshow +12.6418 0.515625 moveto +/six glyphshow +23.6533 0.515625 moveto +/multiply glyphshow +37.3368 0.515625 moveto +/one glyphshow +45.7668 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +54.315 5.24062 moveto +/minus glyphshow +62.0865 5.24062 moveto +/two glyphshow +grestore +gsave +33.7128 198.799 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/two glyphshow +8.43005 0.515625 moveto +/period glyphshow +12.1418 0.515625 moveto +/eight glyphshow +23.1533 0.515625 moveto +/multiply glyphshow +36.8368 0.515625 moveto +/one glyphshow +45.2668 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +53.815 5.24062 moveto +/minus glyphshow +61.5865 5.24062 moveto +/two glyphshow +grestore +gsave +45.7128 236.763 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/three glyphshow +11.0115 0.515625 moveto +/multiply glyphshow +24.6949 0.515625 moveto +/one glyphshow +33.125 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +41.6732 5.24062 moveto +/minus glyphshow +49.4446 5.24062 moveto +/two glyphshow +grestore +gsave +33.7128 272.275 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/three glyphshow +8.43005 0.515625 moveto +/period glyphshow +12.2668 0.515625 moveto +/two glyphshow +23.2783 0.515625 moveto +/multiply glyphshow +36.9618 0.515625 moveto +/one glyphshow +45.3918 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +53.94 5.24062 moveto +/minus glyphshow +61.7115 5.24062 moveto +/two glyphshow +grestore +gsave +32.7128 305.634 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/three glyphshow +8.43005 0.515625 moveto +/period glyphshow +12.6418 0.515625 moveto +/four glyphshow +23.6533 0.515625 moveto +/multiply glyphshow +37.3368 0.515625 moveto +/one glyphshow +45.7668 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +54.315 5.24062 moveto +/minus glyphshow +62.0865 5.24062 moveto +/two glyphshow +grestore +/DejaVuSerif 14.400 selectfont +gsave + +25.6034 141.984 translate +90 rotate +0 0 m /T glyphshow +9.58801 0 m /r glyphshow +16.4597 0 m /a glyphshow +25.0299 0 m /i glyphshow +29.6274 0 m /n glyphshow +38.8855 0 m /i glyphshow +43.483 0 m /n glyphshow +52.7411 0 m /g glyphshow +61.9431 0 m /space glyphshow +66.5125 0 m /T glyphshow +76.1005 0 m /i glyphshow +80.6979 0 m /m glyphshow +94.3289 0 m /e glyphshow +grestore +1.200 setlinewidth +0.122 0.467 0.706 setrgbcolor +gsave +325.08 273.252 108.313 56.796 clipbox +283.788963 310.291969 m +stroke +grestore +gsave +325.08 273.252 108.313 56.796 clipbox +283.788963 302.547354 m +283.788963 317.627455 l +stroke +grestore +0 setlinecap +[4.8 1.8] 0 setdash +gsave +325.08 273.252 108.313 56.796 clipbox +283.788963 142.738982 m +stroke +grestore +1 setlinecap +[] 0 setdash +gsave +325.08 273.252 108.313 56.796 clipbox +283.788963 134.430515 m +283.788963 151.743921 l +stroke +grestore +0 setlinecap +[1.2 1.2] 0 setdash +gsave +325.08 273.252 108.313 56.796 clipbox +283.788963 87.537625 m +stroke +grestore +1 setlinecap +[] 0 setdash +gsave +325.08 273.252 108.313 56.796 clipbox +283.788963 69.216545 m +283.788963 114.754776 l +stroke +grestore +1.000 setlinewidth +0 setlinejoin +2 setlinecap +0.800 setgray +gsave +108.312798 56.796 m +108.312798 330.048 l +stroke +grestore +gsave +433.392671 56.796 m +433.392671 330.048 l +stroke +grestore +gsave +108.312798 56.796 m +433.392671 56.796 l +stroke +grestore +gsave +108.312798 330.048 m +433.392671 330.048 l +stroke +grestore +0.800 setlinewidth +0 setlinecap +gsave +342.834546 241.61925 m +424.152671 241.61925 l +425.912671 241.61925 426.792671 242.49925 426.792671 244.25925 c +426.792671 320.808 l +426.792671 322.568 425.912671 323.448 424.152671 323.448 c +342.834546 323.448 l +341.074546 323.448 340.194546 322.568 340.194546 320.808 c +340.194546 244.25925 l +340.194546 242.49925 341.074546 241.61925 342.834546 241.61925 c +cl +gsave +1.000 setgray +fill +grestore +stroke +grestore +0.150 setgray +/DejaVuSerif 14.400 selectfont +gsave + +359.431 307.527 translate +0 rotate +0 0 m /K glyphshow +10.3641 0 m /e glyphshow +18.8712 0 m /r glyphshow +25.7429 0 m /n glyphshow +35.001 0 m /e glyphshow +43.5081 0 m /l glyphshow +grestore +1.200 setlinewidth +1 setlinejoin +1 setlinecap +0.122 0.467 0.706 setrgbcolor +gsave +345.474546 292.766125 m +358.674546 292.766125 l +371.874546 292.766125 l +stroke +grestore +0.150 setgray +/DejaVuSerif 13.200 selectfont +gsave + +382.435 288.146 translate +0 rotate +0 0 m /r glyphshow +6.33386 0 m /b glyphshow +14.8157 0 m /f glyphshow +grestore +0 setlinecap +[4.8 1.8] 0 setdash +0.122 0.467 0.706 setrgbcolor +gsave +345.474546 273.588 m +358.674546 273.588 l +371.874546 273.588 l +stroke +grestore +0.150 setgray +/DejaVuSerif 13.200 selectfont +gsave + +382.435 268.968 translate +0 rotate +0 0 m /p glyphshow +8.48181 0 m /o glyphshow +16.459 0 m /l glyphshow +20.6967 0 m /y glyphshow +grestore +[1.2 1.2] 0 setdash +0.122 0.467 0.706 setrgbcolor +gsave +345.474546 254.222375 m +358.674546 254.222375 l +371.874546 254.222375 l +stroke +grestore +0.150 setgray +/DejaVuSerif 13.200 selectfont +gsave + +382.435 249.602 translate +0 rotate +0 0 m /l glyphshow +4.23767 0 m /i glyphshow +8.47534 0 m /n glyphshow +17.0089 0 m /e glyphshow +24.8502 0 m /a glyphshow +32.7498 0 m /r glyphshow +grestore + +end +showpage diff --git a/examples/security/kdd-nsl/retrain.py b/examples/security/kdd-nsl/retrain.py index a7dbac4f..83b398aa 100644 --- a/examples/security/kdd-nsl/retrain.py +++ b/examples/security/kdd-nsl/retrain.py @@ -237,7 +237,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list: results = pd.read_csv("output/train.csv") # Some convenient variable names # input_size = results["data.generate.kwargs.n_samples"] * results["data.generate.kwargs.n_features"] -results["Kernel"] = results["model.init.kwargs.kernel"].copy() +results["Kernel"] = results["model.init.kernel"].copy() # results["Features"] = results["data.generate.kwargs.n_features"].copy() # results["Samples"] = results["data.sample.train_size"].copy() # results["input_size"] = input_size @@ -310,8 +310,11 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list: "r", ) as f: probs = json.load(f) - probs = np.array(probs) - false_confidence = y_test[: len(probs)] - probs[:, 1] + probs = np.squeeze(np.array(probs)) + # take only the second column + if len(probs.shape) > 1: + probs = probs[:, 1] + false_confidence = y_test[: len(probs)] - probs[:] avg_prob = np.mean(false_confidence) with open( Path("output/reports/attack", folder, "score_dict.json"), @@ -341,7 +344,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list: params = json.load(f) else: raise ValueError(f"No params file found for {folder}") - attack_params = params["attack"]["init"]["kwargs"] + attack_params = params["attack"]["init"] attack_params.update({"name": params["attack"]["init"]["name"]}) confidence_ser["Kernel"] = name confidence_ser["Average False Confidence"] = avg_prob @@ -392,7 +395,12 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list: ) as f: probs = json.load(f) probs = np.array(probs) - false_confidence = y_test[: len(probs)] - probs[:, 1] + if len(probs.shape) > 1: + probs = np.squeeze(probs) + probs = probs[:, 1] + else: + probs = np.squeeze(probs) + false_confidence = y_test[: len(probs)] - probs avg_prob = np.mean(false_confidence) pd.DataFrame(probs).to_csv( Path( @@ -429,7 +437,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list: else: logger.warning(f"No params file found for {folder}") continue - attack_params = params["attack"]["init"]["kwargs"] + attack_params = params["attack"]["init"] attack_params.update({"name": params["attack"]["init"]["name"]}) confidence_ser["Kernel"] = name confidence_ser["Average False Confidence After Retraining"] = avg_prob diff --git a/examples/security/truthseeker/.gitignore b/examples/security/truthseeker/.gitignore index b12c2563..ff637185 100644 --- a/examples/security/truthseeker/.gitignore +++ b/examples/security/truthseeker/.gitignore @@ -2,3 +2,4 @@ logs/ multirun/ output/ models/ +/retrain diff --git a/examples/security/truthseeker/attacks.sh b/examples/security/truthseeker/attacks.sh index 76ed02bc..ccbb0574 100644 --- a/examples/security/truthseeker/attacks.sh +++ b/examples/security/truthseeker/attacks.sh @@ -11,7 +11,7 @@ for model_config in $CONFIG_NAMES; do continue fi HYDRA_FULL_ERROR=1 python -m deckard.layers.optimise \ - ++model.init.kernel=kernel_name \ + ++model.init.kernel=${kernel_name} \ ++stage=attack \ ++attack.init.name=art.attacks.evasion.ProjectedGradientDescent \ ++attack.init.norm=1,2,inf \ diff --git a/examples/security/truthseeker/dvc.lock b/examples/security/truthseeker/dvc.lock index f3ba1d0a..0945b506 100644 --- a/examples/security/truthseeker/dvc.lock +++ b/examples/security/truthseeker/dvc.lock @@ -94,39 +94,39 @@ stages: outs: - path: output/reports/train/default/params.yaml hash: md5 - md5: 7234aab7d5edae504afa2090d96e4c3f - size: 2434 + md5: 6225c0aefe4059bfae7f5b0e04ae549a + size: 2189 - path: output/reports/train/default/predictions.json hash: md5 - md5: 7e3dec7b2d06af151bf81addc33fba5a - size: 44061 + md5: 3c5089245ae71f1b860304a02a224078 + size: 70072 - path: output/reports/train/default/probabilities.json hash: md5 - md5: 7e3dec7b2d06af151bf81addc33fba5a - size: 44061 + md5: 3c5089245ae71f1b860304a02a224078 + size: 70072 - path: output/reports/train/default/score_dict.json hash: md5 - md5: 1b659aed969c2f3dbd29681d381ce1d0 - size: 360 + md5: 82b8ad9524a1b60f5cbdf4937870888b + size: 717 attack: cmd: python -m deckard.layers.experiment attack deps: - path: output/reports/train/default/params.yaml hash: md5 - md5: 7234aab7d5edae504afa2090d96e4c3f - size: 2434 + md5: 6225c0aefe4059bfae7f5b0e04ae549a + size: 2189 - path: output/reports/train/default/predictions.json hash: md5 - md5: 7e3dec7b2d06af151bf81addc33fba5a - size: 44061 + md5: 3c5089245ae71f1b860304a02a224078 + size: 70072 - path: output/reports/train/default/probabilities.json hash: md5 - md5: 7e3dec7b2d06af151bf81addc33fba5a - size: 44061 + md5: 3c5089245ae71f1b860304a02a224078 + size: 70072 - path: output/reports/train/default/score_dict.json hash: md5 - md5: 1b659aed969c2f3dbd29681d381ce1d0 - size: 360 + md5: 82b8ad9524a1b60f5cbdf4937870888b + size: 717 params: params.yaml: attack: @@ -315,32 +315,32 @@ stages: outs: - path: output/attacks/attack.pkl hash: md5 - md5: 2b7587aefdfa486e84fb3c4ccb5f640c + md5: 444495650bb1e76bae90cbb99153f824 size: 1832 - path: output/reports/attack/default/adv_predictions.json hash: md5 - md5: 18482a5b7773de281dc9e127a6febf98 - size: 438 + md5: 9878cc54791c7354cb668af97e66079a + size: 700 - path: output/reports/attack/default/adv_probabilities.json hash: md5 - md5: 18482a5b7773de281dc9e127a6febf98 - size: 438 + md5: 9878cc54791c7354cb668af97e66079a + size: 700 - path: output/reports/attack/default/params.yaml hash: md5 - md5: b300c684dc58fc23684ccefbb9f83265 - size: 5832 + md5: 3aa13a2e1e66b911f66d9bd8a8823369 + size: 5310 - path: output/reports/attack/default/predictions.json hash: md5 - md5: 7e3dec7b2d06af151bf81addc33fba5a - size: 44061 + md5: 3c5089245ae71f1b860304a02a224078 + size: 70072 - path: output/reports/attack/default/probabilities.json hash: md5 - md5: 7e3dec7b2d06af151bf81addc33fba5a - size: 44061 + md5: 3c5089245ae71f1b860304a02a224078 + size: 70072 - path: output/reports/attack/default/score_dict.json hash: md5 - md5: fe6164548c98534ee88f439f91a5151a - size: 585 + md5: 04f78e33b2894f630875ad3c6412a5ff + size: 1238 models: cmd: bash other_data.sh +stage=train --config-name=model.yaml deps: @@ -448,53 +448,54 @@ stages: outs: - path: logs/models/ hash: md5 - md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir - size: 357091 + md5: 8e67f43a680648ecc549525d90f55662.dir + size: 202043 nfiles: 3 - path: model.db hash: md5 - md5: 0b595e029e8e9d6e99c3da6511906eb7 - size: 778240 + md5: f283988890339a1e01b295d97ca2f929 + size: 155648 compile_models: cmd: python -m deckard.layers.compile --report_folder output/reports/train/ --results_file output/train.csv deps: - path: logs/models/ hash: md5 - md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir - size: 357091 + md5: 8e67f43a680648ecc549525d90f55662.dir + size: 202043 nfiles: 3 - path: model.db hash: md5 - md5: 0b595e029e8e9d6e99c3da6511906eb7 - size: 778240 + md5: f283988890339a1e01b295d97ca2f929 + size: 155648 - path: output/reports/train/ hash: md5 - md5: 0f4c497909d988c75851e5e56a440b89.dir - size: 42005082 - nfiles: 1637 + md5: c4c5ab1d22c12d150cf53a3b630e8442.dir + size: 10780144 + nfiles: 312 outs: - path: output/train.csv hash: md5 - md5: 348d49dcbf81f9db4f7abb76fcc2f06e - size: 598748 + md5: 5290b41fa9349727642757688378dec0 + size: 152670 find_best_model@rbf: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model - --params_file best_rbf --study_name=rbf --default_config model.yaml + --params_file best_rbf --study_name=rbf --default_config default.yaml --storage_name + sqlite:///model.db deps: - path: logs/models/ hash: md5 - md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir - size: 357091 + md5: 8e67f43a680648ecc549525d90f55662.dir + size: 202043 nfiles: 3 - path: model.db hash: md5 - md5: 0b595e029e8e9d6e99c3da6511906eb7 - size: 778240 + md5: f283988890339a1e01b295d97ca2f929 + size: 155648 - path: output/train.csv hash: md5 - md5: 348d49dcbf81f9db4f7abb76fcc2f06e - size: 598748 + md5: 5290b41fa9349727642757688378dec0 + size: 152670 outs: - path: conf/model/best_rbf.yaml hash: md5 @@ -502,21 +503,22 @@ stages: size: 359 find_best_model@linear: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model - --params_file best_linear --study_name=linear --default_config model.yaml + --params_file best_linear --study_name=linear --default_config default.yaml + --storage_name sqlite:///model.db deps: - path: logs/models/ hash: md5 - md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir - size: 357091 + md5: 8e67f43a680648ecc549525d90f55662.dir + size: 202043 nfiles: 3 - path: model.db hash: md5 - md5: 0b595e029e8e9d6e99c3da6511906eb7 - size: 778240 + md5: f283988890339a1e01b295d97ca2f929 + size: 155648 - path: output/train.csv hash: md5 - md5: 348d49dcbf81f9db4f7abb76fcc2f06e - size: 598748 + md5: 5290b41fa9349727642757688378dec0 + size: 152670 outs: - path: conf/model/best_linear.yaml hash: md5 @@ -524,26 +526,27 @@ stages: size: 330 find_best_model@poly: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model - --params_file best_poly --study_name=poly --default_config model.yaml + --params_file best_poly --study_name=poly --default_config default.yaml --storage_name + sqlite:///model.db deps: - path: logs/models/ hash: md5 - md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir - size: 357091 + md5: 8e67f43a680648ecc549525d90f55662.dir + size: 202043 nfiles: 3 - path: model.db hash: md5 - md5: 0b595e029e8e9d6e99c3da6511906eb7 - size: 778240 + md5: f283988890339a1e01b295d97ca2f929 + size: 155648 - path: output/train.csv hash: md5 - md5: 348d49dcbf81f9db4f7abb76fcc2f06e - size: 598748 + md5: 5290b41fa9349727642757688378dec0 + size: 152670 outs: - path: conf/model/best_poly.yaml hash: md5 - md5: 12f892f3ba4ef8bab095b36bd7558d3e - size: 372 + md5: 307b98679bd448826190d15d2c48db7b + size: 369 attacks: cmd: bash attacks.sh ++stage=attack --config-name=attack.yaml deps: @@ -553,34 +556,34 @@ stages: size: 330 - path: conf/model/best_poly.yaml hash: md5 - md5: 12f892f3ba4ef8bab095b36bd7558d3e - size: 372 + md5: 307b98679bd448826190d15d2c48db7b + size: 369 - path: conf/model/best_rbf.yaml hash: md5 md5: 4932ceac75d6256ce2a7864aa4a5ea3c size: 359 - path: logs/models/ hash: md5 - md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir - size: 357091 + md5: 8e67f43a680648ecc549525d90f55662.dir + size: 202043 nfiles: 3 - path: model.db hash: md5 - md5: 0b595e029e8e9d6e99c3da6511906eb7 - size: 778240 + md5: f283988890339a1e01b295d97ca2f929 + size: 155648 - path: output/train.csv hash: md5 - md5: 348d49dcbf81f9db4f7abb76fcc2f06e - size: 598748 + md5: 5290b41fa9349727642757688378dec0 + size: 152670 outs: - path: attack.db hash: md5 - md5: 32b63718640047c18ed7bb1aff484595 - size: 389120 + md5: 7c78ffc40aedba8c75061fdf40fdf315 + size: 208896 - path: logs/attacks/ hash: md5 - md5: 61801da5096fd94a88d69f6de5be2413.dir - size: 3180296 + md5: f9bd73b81f44394d16d6bc194c85fb14.dir + size: 420089 nfiles: 3 compile_attacks: cmd: python -m deckard.layers.compile --report_folder output/reports/attack/ --results_file @@ -588,89 +591,92 @@ stages: deps: - path: attack.db hash: md5 - md5: 32b63718640047c18ed7bb1aff484595 - size: 389120 + md5: 7c78ffc40aedba8c75061fdf40fdf315 + size: 208896 - path: logs/attacks/ hash: md5 - md5: 61801da5096fd94a88d69f6de5be2413.dir - size: 3180296 + md5: f9bd73b81f44394d16d6bc194c85fb14.dir + size: 420089 nfiles: 3 - path: output/reports/attack/ hash: md5 - md5: 84a4553074e952b76f6a4f228dddbb47.dir - size: 29299858 - nfiles: 1968 + md5: 11465f27296c17a8863dcc4bcea9eb22.dir + size: 20702813 + nfiles: 1093 outs: - path: output/attack.csv hash: md5 - md5: 188c5eda3a172c9a30808781f429aed4 - size: 703053 + md5: 490f9a3401c509d62c0b293ffa634a65 + size: 503235 find_best_attack@linear: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack - --params_file best_linear --study_name=best_linear --default_config attack.yaml + --params_file best_linear --study_name=best_linear --default_config default.yaml + --storage_name sqlite:///attack.db --direction minimize deps: - path: attack.db hash: md5 - md5: 32b63718640047c18ed7bb1aff484595 - size: 389120 + md5: 7c78ffc40aedba8c75061fdf40fdf315 + size: 208896 - path: logs/models/ hash: md5 - md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir - size: 357091 + md5: 8e67f43a680648ecc549525d90f55662.dir + size: 202043 nfiles: 3 - path: output/train.csv hash: md5 - md5: 348d49dcbf81f9db4f7abb76fcc2f06e - size: 598748 + md5: 5290b41fa9349727642757688378dec0 + size: 152670 outs: - path: conf/attack/best_linear.yaml hash: md5 - md5: df65ae18996a57abebd38df98db37edb - size: 245 + md5: 3b770eef3005669fb6c893dc239337c1 + size: 248 find_best_attack@rbf: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack - --params_file best_rbf --study_name=best_rbf --default_config attack.yaml + --params_file best_rbf --study_name=best_rbf --default_config default.yaml + --storage_name sqlite:///attack.db --direction minimize deps: - path: attack.db hash: md5 - md5: 32b63718640047c18ed7bb1aff484595 - size: 389120 + md5: 7c78ffc40aedba8c75061fdf40fdf315 + size: 208896 - path: logs/models/ hash: md5 - md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir - size: 357091 + md5: 8e67f43a680648ecc549525d90f55662.dir + size: 202043 nfiles: 3 - path: output/train.csv hash: md5 - md5: 348d49dcbf81f9db4f7abb76fcc2f06e - size: 598748 + md5: 5290b41fa9349727642757688378dec0 + size: 152670 outs: - path: conf/attack/best_rbf.yaml hash: md5 - md5: 9871a9d8d50ef211c7f0ae884bb39fe4 - size: 247 + md5: 78076d6ff4a3f2f5ec4e550db50b759f + size: 245 find_best_attack@poly: cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack - --params_file best_poly --study_name=best_poly --default_config attack.yaml + --params_file best_poly --study_name=best_poly --default_config default.yaml + --storage_name sqlite:///attack.db --direction minimize deps: - path: attack.db hash: md5 - md5: 32b63718640047c18ed7bb1aff484595 - size: 389120 + md5: 7c78ffc40aedba8c75061fdf40fdf315 + size: 208896 - path: logs/models/ hash: md5 - md5: f7c1d4ea5ab2d8cc5d5214e2f7b4e149.dir - size: 357091 + md5: 8e67f43a680648ecc549525d90f55662.dir + size: 202043 nfiles: 3 - path: output/train.csv hash: md5 - md5: 348d49dcbf81f9db4f7abb76fcc2f06e - size: 598748 + md5: 5290b41fa9349727642757688378dec0 + size: 152670 outs: - path: conf/attack/best_poly.yaml hash: md5 - md5: d4c4945873617b0652018e6f27e52b89 - size: 247 + md5: 5355e960ee2cab726da8da4f761746b5 + size: 248 other_data_train@kdd_nsl: cmd: DATASET_NAME=kdd_nsl bash other_data.sh data=kdd_nsl +stage=train --config-name=model.yaml deps: @@ -706,93 +712,94 @@ stages: deps: - path: conf/attack/best_linear.yaml hash: md5 - md5: df65ae18996a57abebd38df98db37edb - size: 245 + md5: 3b770eef3005669fb6c893dc239337c1 + size: 248 - path: conf/attack/best_poly.yaml hash: md5 - md5: d4c4945873617b0652018e6f27e52b89 - size: 247 + md5: 5355e960ee2cab726da8da4f761746b5 + size: 248 - path: conf/attack/best_rbf.yaml hash: md5 - md5: 9871a9d8d50ef211c7f0ae884bb39fe4 - size: 247 + md5: 78076d6ff4a3f2f5ec4e550db50b759f + size: 245 - path: conf/model/best_linear.yaml hash: md5 md5: e4ae7059114d8724d4947e952145d4fe size: 330 - path: conf/model/best_poly.yaml hash: md5 - md5: 12f892f3ba4ef8bab095b36bd7558d3e - size: 372 + md5: 307b98679bd448826190d15d2c48db7b + size: 369 - path: conf/model/best_rbf.yaml hash: md5 md5: 4932ceac75d6256ce2a7864aa4a5ea3c size: 359 - path: output/attacks/ hash: md5 - md5: cde8aa6baa7c2646a1fc09ea3956b5e6.dir - size: 327928 - nfiles: 179 - - path: output/models/ - hash: md5 - md5: 420131f3b75400bb25e03920f359494a.dir - size: 2326552 - nfiles: 272 + md5: b66feb7848ca1405dfb53b0aa2f6ca1e.dir + size: 2036072 + nfiles: 121 outs: - path: plots/after_retrain_confidence.csv hash: md5 - md5: 6818046e86115df423cf15e24a43536f - size: 52143 + md5: 73b389e63f70f94899b8c3d6d3c97bcd + size: 394238 - path: plots/before_retrain_confidence.csv hash: md5 - md5: d479df2e41303c4466ff8f9218d0fe66 - size: 52126 + md5: 9ee0eafdd6ba1764ae7f31f5856fe164 + size: 394221 - path: retrain/ hash: md5 - md5: 2360b46dfe437da0aff771c4522c37eb.dir - size: 174505 + md5: 19310315f07f04e7842f59c9df05db78.dir + size: 176116 nfiles: 12 plots: cmd: python plots.py deps: - path: output/attack.csv hash: md5 - md5: 188c5eda3a172c9a30808781f429aed4 - size: 703053 + md5: 490f9a3401c509d62c0b293ffa634a65 + size: 503235 - path: output/train.csv hash: md5 - md5: 348d49dcbf81f9db4f7abb76fcc2f06e - size: 598748 + md5: 5290b41fa9349727642757688378dec0 + size: 152670 + - path: plots.py + hash: md5 + md5: f1f73855e466a5f38128b4123f7bd186 + size: 10155 - path: plots/after_retrain_confidence.csv hash: md5 - md5: 6818046e86115df423cf15e24a43536f - size: 52143 + md5: 73b389e63f70f94899b8c3d6d3c97bcd + size: 394238 - path: plots/before_retrain_confidence.csv hash: md5 - md5: d479df2e41303c4466ff8f9218d0fe66 - size: 52126 + md5: 9ee0eafdd6ba1764ae7f31f5856fe164 + size: 394221 outs: - - path: plots/accuracy_vs_attack_parameters.pdf + - path: plots/accuracy_vs_attack_parameters.eps hash: md5 - md5: 9a97f9f585f99c7794818b8fa38ac311 - size: 15792 - - path: plots/confidence_vs_attack_parameters.pdf + md5: aa706c0ecf286ccbebf168f078a29d75 + size: 39185 + - path: plots/confidence_vs_attack_parameters.eps hash: md5 - md5: 65d58bfd40e40bea5e9114c84e353ea2 - size: 17506 - - path: plots/retrain_accuracy.pdf + md5: a77acb08b4c7bfa4ad937b6a085b9eed + size: 41336 + - path: plots/retrain_accuracy.eps hash: md5 - md5: 577e89d46eb6f2446d0a3ed83b4f9e19 - size: 13913 - - path: plots/retrain_confidence_vs_attack_parameters.pdf + md5: 106ffdb6d70899f23fc71927e5029133 + size: 30830 + - path: plots/retrain_confidence_vs_attack_parameters.eps hash: md5 - md5: e1fa2d6ebd91b406426215c07d9df11a - size: 18683 - - path: plots/retrain_time.pdf + md5: 002bd002f2e020dadcc8cc18bacbe13f + size: 41837 + - path: plots/retrain_time.eps hash: md5 - md5: d48a53f11dd9db3b30b9382e3404963d - size: 12916 - - path: plots/train_time_vs_attack_parameters.pdf + md5: 9fcacfebf8617111de7d546b788ba83f + size: 28365 + - path: plots/train_time_vs_attack_parameters.eps hash: md5 - md5: f0a52d3088d3b90f7d6e157b87e6fc5a - size: 17167 + md5: 22fa5b3a2e2b5d8b532a59415484223b + size: 39894 + move_files: + cmd: cp -r ./plots/* ~/KDD-Paper-EAI-AISEC/truthseeker/ && rm ~/KDD-Paper-EAI-AISEC/truthseeker/.gitignore diff --git a/examples/security/truthseeker/dvc.yaml b/examples/security/truthseeker/dvc.yaml index 6b6c8962..0794289c 100644 --- a/examples/security/truthseeker/dvc.yaml +++ b/examples/security/truthseeker/dvc.yaml @@ -73,7 +73,7 @@ stages: - rbf - poly do: - cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model --params_file best_${item} --study_name=${item} --default_config model.yaml + cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir model --params_file best_${item} --study_name=${item} --default_config default.yaml --storage_name sqlite:///model.db outs: - conf/model/best_${item}.yaml deps: @@ -111,7 +111,7 @@ stages: - rbf - poly do: - cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack --params_file best_${item} --study_name=best_${item} --default_config attack.yaml + cmd: python -m deckard.layers.find_best --config_folder conf --config_subdir attack --params_file best_${item} --study_name=best_${item} --default_config default.yaml --storage_name sqlite:///attack.db --direction minimize outs: - conf/attack/best_${item}.yaml deps: @@ -121,7 +121,6 @@ stages: retrain: cmd : python retrain.py deps: - - ${files.directory}/models/ - ${files.directory}/attacks/ - conf/attack/best_linear.yaml - conf/attack/best_rbf.yaml @@ -141,14 +140,18 @@ stages: - output/attack.csv - plots/before_retrain_confidence.csv - output/train.csv + - plots.py plots : - - plots/accuracy_vs_attack_parameters.pdf - # - plots/accuracy_vs_features.pdf - # - plots/accuracy_vs_samples.pdf - - plots/confidence_vs_attack_parameters.pdf - - plots/train_time_vs_attack_parameters.pdf - # - plots/train_time_vs_features.pdf - # - plots/train_time_vs_samples.pdf - - plots/retrain_accuracy.pdf - - plots/retrain_confidence_vs_attack_parameters.pdf - - plots/retrain_time.pdf + - plots/accuracy_vs_attack_parameters.eps + # - plots/accuracy_vs_features.eps + # - plots/accuracy_vs_samples.eps + - plots/confidence_vs_attack_parameters.eps + - plots/train_time_vs_attack_parameters.eps + # - plots/train_time_vs_features.eps + # - plots/train_time_vs_samples.eps + - plots/retrain_accuracy.eps + - plots/retrain_confidence_vs_attack_parameters.eps + - plots/retrain_time.eps + move_files: + cmd: >- + cp -r ./plots/* ~/KDD-Paper-EAI-AISEC/truthseeker/ && rm ~/KDD-Paper-EAI-AISEC/truthseeker/.gitignore diff --git a/examples/security/truthseeker/plots.py b/examples/security/truthseeker/plots.py index c5ae8ac3..b5499185 100644 --- a/examples/security/truthseeker/plots.py +++ b/examples/security/truthseeker/plots.py @@ -2,7 +2,6 @@ import seaborn as sns from pathlib import Path import matplotlib.pyplot as plt - import logging sns.set_style("whitegrid") @@ -19,28 +18,16 @@ # else: # results = parse_results("reports/model_queue/") results = pd.read_csv("output/train.csv") -# input_size = results["data.generate.kwargs.n_samples"] * results["data.generate.kwargs.n_features"] -results["Kernel"] = results["model.init.kwargs.kernel"].copy() -# results["Features"] = results["data.generate.kwargs.n_features"].copy() -results["Samples"] = results["data.sample.train_size"].copy() -# results["input_size"] = input_size -# sample_list = results["data.generate.kwargs.n_samples"].unique() -# feature_list = results["data.generate.kwargs.n_features"].unique() -kernel_list = results["model.init.kwargs.kernel"].unique() +results["Kernel"] = results["model.init.kernel"].copy() if "Unnamed: 0" in results.columns: del results["Unnamed: 0"] for col in results.columns: if col == "data.name" and isinstance(results[col][0], list): results[col] = results[col].apply(lambda x: x[0]) -results = results[results["model.init.kwargs.kernel"] != "sigmoid"] +results = results[results["model.init.kernel"] != "sigmoid"] attack_results = pd.read_csv("output/attack.csv") -attack_results["Kernel"] = attack_results["model.init.kwargs.kernel"].copy() -# attack_results["Features"] = attack_results["data.generate.kwargs.n_features"].copy() -# attack_results["Samples"] = attack_results["data.sample.train_size"].copy() -# sample_list = attack_results["data.generate.kwargs.n_samples"].unique() -# feature_list = attack_results["data.generate.kwargs.n_features"].unique() -kernel_list = attack_results["model.init.kwargs.kernel"].unique() +attack_results["Kernel"] = attack_results["model.init.kernel"].copy() if "Unnamed: 0" in attack_results.columns: del attack_results["Unnamed: 0"] for col in attack_results.columns: @@ -48,75 +35,26 @@ attack_results[col] = attack_results[col].apply(lambda x: x[0]) -# graph1 = sns.lineplot( -# x="data.sample.train_size", -# y="accuracy", -# data=results, -# style="Kernel", -# style_order=["rbf", "poly", "linear"], -# ) -# graph1.legend(labels=["Linear", "RBF", "Poly"]) -# graph1.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") -# graph1.set_xlabel("Number of Samples") -# graph1.set_ylabel("Accuracy") -# graph1.set_xscale("log") -# graph1.get_figure().tight_layout() -# graph1.get_figure().savefig("plots/accuracy_vs_samples.pdf") -# plt.gcf().clear() - -# graph2 = sns.lineplot( -# x="data.generate.kwargs.n_features", -# y="accuracy", -# data=results, -# style="Kernel", -# style_order=["rbf", "poly", "linear"], -# ) -# graph2.set_xlabel("Number of Features") -# graph2.set_ylabel("Accuracy") -# graph2.set_xscale("log") -# graph2.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") -# graph2.get_figure().tight_layout() -# graph2.get_figure().savefig("plots/accuracy_vs_features.pdf") -# plt.gcf().clear() - -# results["train_time"] = ( -# results["train_time"] -# * results["data.sample.train_size"] -# * results["data.generate.kwargs.n_samples"] -# ) -# graph3 = sns.lineplot( -# x="data.generate.kwargs.n_features", -# y="train_time", -# data=results, -# style="Kernel", -# style_order=["rbf", "poly", "linear"], -# ) -# graph3.set_xlabel("Number of Features") -# graph3.set_ylabel("Training Time") -# graph3.set(yscale="log", xscale="log") -# graph3.legend(title="Kernel") -# graph3.get_figure().tight_layout() -# graph3.get_figure().savefig("plots/train_time_vs_features.pdf") -# plt.gcf().clear() - -# graph4 = sns.lineplot( -# x="data.sample.train_size", -# y="train_time", -# data=results, -# style="Kernel", -# style_order=["rbf", "poly", "linear"], -# ) -# graph4.set_xlabel("Number of Samples") -# graph4.set_ylabel("Training Time") -# graph4.set(yscale="log", xscale="log") -# graph4.legend(title="Kernel") -# graph4.get_figure().tight_layout() -# graph4.get_figure().savefig("plots/train_time_vs_samples.eps") -# plt.gcf().clear() +graph4 = sns.lineplot( + x="data.sample.train_size", + y="train_time", + data=results, + style="Kernel", + style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), +) +graph4.set_xlabel("Number of Samples") +graph4.set_ylabel("Training Time") +graph4.set(yscale="log", xscale="log", xlim=(10, 1e6)) +graph4.legend(title="Kernel") +graph4.get_figure().tight_layout() +graph4.get_figure().savefig("plots/train_time_vs_samples.eps") +plt.gcf().clear() fig, ax = plt.subplots(2, 2) graph5 = sns.lineplot( - x="attack.init.kwargs.eps", + x="attack.init.eps", y="accuracy", data=attack_results, style="Kernel", @@ -124,20 +62,24 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph5.set(xscale="log", xlabel="Perturbation Distance", ylabel="Accuracy") graph6 = sns.lineplot( - x="attack.init.kwargs.eps_step", + x="attack.init.eps_step", y="accuracy", data=attack_results, style="Kernel", ax=ax[0, 1], color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph6.set(xscale="log", xlabel="Perturbation Step", ylabel="Accuracy") graph7 = sns.lineplot( - x="attack.init.kwargs.max_iter", + x="attack.init.max_iter", y="accuracy", data=attack_results, style="Kernel", @@ -145,10 +87,12 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph7.set(xscale="log", xlabel="Maximum Iterations", ylabel="Accuracy") graph8 = sns.lineplot( - x="attack.init.kwargs.batch_size", + x="attack.init.batch_size", y="accuracy", data=attack_results, style="Kernel", @@ -156,16 +100,18 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph8.set(xscale="log", xlabel="Batch Size", ylabel="Accuracy") graph6.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") fig.tight_layout() -fig.savefig("plots/accuracy_vs_attack_parameters.pdf") +fig.savefig("plots/accuracy_vs_attack_parameters.eps") plt.gcf().clear() fig, ax = plt.subplots(2, 2) graph9 = sns.lineplot( - x="attack.init.kwargs.eps", + x="attack.init.eps", y="adv_fit_time", data=attack_results, style="Kernel", @@ -173,20 +119,24 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="Attack Time") graph10 = sns.lineplot( - x="attack.init.kwargs.eps_step", + x="attack.init.eps_step", y="adv_fit_time", data=attack_results, style="Kernel", ax=ax[0, 1], color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="Attack Time") graph11 = sns.lineplot( - x="attack.init.kwargs.max_iter", + x="attack.init.max_iter", y="adv_fit_time", data=attack_results, style="Kernel", @@ -194,10 +144,12 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="Attack Time") graph12 = sns.lineplot( - x="attack.init.kwargs.batch_size", + x="attack.init.batch_size", y="adv_fit_time", data=attack_results, style="Kernel", @@ -205,11 +157,13 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph12.set(xscale="log", xlabel="Batch Size", ylabel="Attack Time") graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") fig.tight_layout(h_pad=0.5) -fig.savefig("plots/train_time_vs_attack_parameters.pdf") +fig.savefig("plots/train_time_vs_attack_parameters.eps") plt.gcf().clear() retrain_df = pd.DataFrame() @@ -232,6 +186,8 @@ data=retrain_df, style="Kernel", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) retrain = sns.lineplot( x="Epochs", @@ -241,12 +197,14 @@ color="darkred", legend=False, style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) retrain.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") retrain.set_xlabel("Retraining Epochs") retrain.set_ylabel("Accuracy") retrain.get_figure().tight_layout() -retrain.get_figure().savefig("plots/retrain_accuracy.pdf") +retrain.get_figure().savefig("plots/retrain_accuracy.eps") plt.gcf().clear() retrain_df["ben_time"] = retrain_df["ben_time"] * retrain_df["train_size"] * 10 @@ -257,6 +215,8 @@ data=retrain_df, style="Kernel", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) retrain = sns.lineplot( x="Epochs", @@ -266,13 +226,15 @@ color="darkred", legend=False, style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) retrain.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") retrain.set_xlabel("Retraining Epochs") retrain.set_ylabel("Time") retrain.set_yscale("log") retrain.get_figure().tight_layout() -retrain.get_figure().savefig("plots/retrain_time.pdf") +retrain.get_figure().savefig("plots/retrain_time.eps") plt.gcf().clear() confidence_df = pd.read_csv("plots/before_retrain_confidence.csv") @@ -286,6 +248,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="False Confidence") graph10 = sns.lineplot( @@ -296,6 +260,8 @@ ax=ax[0, 1], color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="False Confidence") graph11 = sns.lineplot( @@ -307,6 +273,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="False Confidence") graph12 = sns.lineplot( @@ -318,11 +286,13 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph12.set(xscale="log", xlabel="Batch Size", ylabel="False Confidence") graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") fig.tight_layout(h_pad=0.5) -fig.savefig("plots/confidence_vs_attack_parameters.pdf") +fig.savefig("plots/confidence_vs_attack_parameters.eps") plt.gcf().clear() confdence_df = pd.read_csv("plots/after_retrain_confidence.csv") @@ -337,6 +307,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph9.set(xscale="log", xlabel="Perturbation Distance", ylabel="False Confidence") graph10 = sns.lineplot( @@ -347,6 +319,8 @@ ax=ax[0, 1], color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph10.set(xscale="log", xlabel="Perturbation Step", ylabel="False Confidence") graph11 = sns.lineplot( @@ -358,6 +332,8 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph11.set(xscale="log", xlabel="Maximum Iterations", ylabel="False Confidence") graph12 = sns.lineplot( @@ -369,9 +345,11 @@ legend=False, color="darkred", style_order=["rbf", "poly", "linear"], + err_style="bars", + errorbar=("ci", 99), ) graph12.set(xscale="log", xlabel="Batch Size", ylabel="False Confidence") graph10.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1, title="Kernel") fig.tight_layout(h_pad=0.5) -fig.savefig("plots/retrain_confidence_vs_attack_parameters.pdf") +fig.savefig("plots/retrain_confidence_vs_attack_parameters.eps") plt.gcf().clear() diff --git a/examples/security/truthseeker/plots/.gitignore b/examples/security/truthseeker/plots/.gitignore index dd345776..f09089fa 100644 --- a/examples/security/truthseeker/plots/.gitignore +++ b/examples/security/truthseeker/plots/.gitignore @@ -1,6 +1,6 @@ -/accuracy_vs_attack_parameters.pdf -/confidence_vs_attack_parameters.pdf -/train_time_vs_attack_parameters.pdf -/retrain_accuracy.pdf -/retrain_confidence_vs_attack_parameters.pdf -/retrain_time.pdf +/accuracy_vs_attack_parameters.eps +/confidence_vs_attack_parameters.eps +/train_time_vs_attack_parameters.eps +/retrain_accuracy.eps +/retrain_confidence_vs_attack_parameters.eps +/retrain_time.eps diff --git a/examples/security/truthseeker/plots/train_time_vs_samples.eps b/examples/security/truthseeker/plots/train_time_vs_samples.eps new file mode 100644 index 00000000..0d282c40 --- /dev/null +++ b/examples/security/truthseeker/plots/train_time_vs_samples.eps @@ -0,0 +1,1373 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Title: train_time_vs_samples.eps +%%Creator: Matplotlib v3.7.2, https://matplotlib.org/ +%%CreationDate: Tue Jul 16 15:31:57 2024 +%%Orientation: portrait +%%BoundingBox: 75 223 537 569 +%%HiResBoundingBox: 75.600000 223.200000 536.400000 568.800000 +%%EndComments +%%BeginProlog +/mpldict 11 dict def +mpldict begin +/_d { bind def } bind def +/m { moveto } _d +/l { lineto } _d +/r { rlineto } _d +/c { curveto } _d +/cl { closepath } _d +/ce { closepath eofill } _d +/box { + m + 1 index 0 r + 0 exch r + neg 0 r + cl + } _d +/clipbox { + box + clip + newpath + } _d +/sc { setcachedevice } _d +%!PS-Adobe-3.0 Resource-Font +%%Creator: Converted from TrueType to Type 3 by Matplotlib. +10 dict begin +/FontName /DejaVuSerif def +/PaintType 0 def +/FontMatrix [0.00048828125 0 0 0.00048828125 0 0] def +/FontBBox [-1576 -710 4312 2272] def +/FontType 3 def +/Encoding [/minus /space /period /zero /one /two /three /four /five /six /eight /K /N /S /T /multiply /a /b /e /f /g /i /l /m /n /o /p /r /s /u /y] def +/CharStrings 32 dict dup begin +/.notdef 0 def +/minus{1716 0 217 561 1499 723 sc +217 723 m +1499 723 l +1499 561 l +217 561 l +217 723 l + +ce} _d +/space{651 0 0 0 0 0 sc +ce} _d +/period{651 0 193 -29 459 238 sc +193 104 m +193 141 206 173 231 199 c +256 225 288 238 326 238 c +363 238 394 225 420 199 c +446 173 459 141 459 104 c +459 67 446 36 420 10 c +394 -16 363 -29 326 -29 c +288 -29 256 -16 231 9 c +206 35 193 67 193 104 c + +ce} _d +/zero{1303 0 135 -29 1167 1520 sc +651 70 m +753 70 829 126 880 238 c +931 350 956 519 956 745 c +956 972 931 1141 880 1253 c +829 1365 753 1421 651 1421 c +549 1421 473 1365 422 1253 c +371 1141 346 972 346 745 c +346 519 371 350 422 238 c +473 126 549 70 651 70 c + +651 -29 m +489 -29 362 39 271 175 c +180 311 135 501 135 745 c +135 990 180 1180 271 1316 c +362 1452 489 1520 651 1520 c +814 1520 940 1452 1031 1316 c +1122 1180 1167 990 1167 745 c +1167 501 1122 311 1031 175 c +940 39 814 -29 651 -29 c + +ce} _d +/one{1303 0 250 0 1012 1520 sc +291 0 m +291 106 l +551 106 l +551 1348 l +250 1153 l +250 1284 l +614 1520 l +752 1520 l +752 106 l +1012 106 l +1012 0 l +291 0 l + +ce} _d +/two{1303 0 139 0 1102 1520 sc +262 1137 m +150 1137 l +150 1403 l +221 1441 293 1470 365 1490 c +438 1510 509 1520 578 1520 c +733 1520 856 1482 946 1407 c +1036 1332 1081 1229 1081 1100 c +1081 954 979 779 775 576 c +759 561 747 549 739 541 c +362 164 l +985 164 l +985 348 l +1102 348 l +1102 0 l +139 0 l +139 109 l +592 561 l +692 661 763 753 806 836 c +849 920 870 1008 870 1100 c +870 1201 844 1279 791 1336 c +739 1393 667 1421 575 1421 c +480 1421 406 1397 354 1350 c +302 1303 271 1232 262 1137 c + +ce} _d +/three{1303 0 156 -29 1151 1520 sc +199 1430 m +277 1459 352 1482 423 1497 c +495 1512 562 1520 625 1520 c +771 1520 885 1488 967 1425 c +1049 1362 1090 1275 1090 1163 c +1090 1073 1062 998 1005 937 c +948 877 868 836 764 815 c +887 798 982 753 1049 681 c +1117 610 1151 517 1151 403 c +1151 264 1104 157 1010 82 c +917 8 782 -29 606 -29 c +528 -29 452 -21 377 -4 c +303 13 229 38 156 72 c +156 362 l +268 362 l +275 266 307 193 365 144 c +423 95 505 70 610 70 c +712 70 792 99 851 158 c +910 217 940 298 940 401 c +940 518 910 607 849 667 c +788 728 699 758 582 758 c +487 758 l +487 860 l +537 860 l +654 860 741 884 799 932 c +858 981 887 1054 887 1151 c +887 1238 863 1305 815 1351 c +767 1398 698 1421 608 1421 c +518 1421 448 1400 398 1357 c +349 1314 320 1251 311 1167 c +199 1167 l +199 1430 l + +ce} _d +/four{1303 0 63 0 1200 1520 sc +715 506 m +715 1300 l +205 506 l +715 506 l + +1155 0 m +475 0 l +475 106 l +715 106 l +715 399 l +63 399 l +63 508 l +717 1520 l +915 1520 l +915 506 l +1200 506 l +1200 399 l +915 399 l +915 106 l +1155 106 l +1155 0 l + +ce} _d +/five{1303 0 174 -29 1145 1493 sc +1030 1493 m +1030 1329 l +346 1329 l +346 901 l +381 925 421 943 467 955 c +514 967 566 973 623 973 c +784 973 912 928 1005 839 c +1098 750 1145 628 1145 473 c +1145 315 1098 192 1003 103 c +909 15 777 -29 606 -29 c +537 -29 467 -21 395 -4 c +323 13 249 38 174 72 c +174 362 l +287 362 l +293 267 323 195 377 145 c +432 95 508 70 606 70 c +711 70 792 105 849 174 c +906 243 934 343 934 473 c +934 602 906 701 849 770 c +793 839 712 874 606 874 c +546 874 493 863 447 842 c +402 821 361 788 326 743 c +240 743 l +240 1493 l +1030 1493 l + +ce} _d +/six{1303 0 137 -29 1174 1520 sc +670 70 m +764 70 836 104 887 173 c +938 242 963 342 963 471 c +963 600 938 699 887 768 c +836 837 764 872 670 872 c +575 872 502 839 452 772 c +402 705 377 609 377 483 c +377 350 402 248 453 177 c +504 106 576 70 670 70 c + +344 822 m +389 872 441 909 498 934 c +555 959 620 971 692 971 c +841 971 958 926 1044 837 c +1131 748 1174 626 1174 471 c +1174 320 1127 198 1034 107 c +941 16 817 -29 662 -29 c +493 -29 364 34 273 159 c +182 285 137 465 137 698 c +137 959 191 1162 298 1305 c +405 1448 557 1520 752 1520 c +805 1520 860 1515 918 1505 c +976 1495 1035 1480 1096 1460 c +1096 1214 l +983 1214 l +975 1281 949 1333 906 1368 c +863 1403 804 1421 731 1421 c +602 1421 505 1372 442 1274 c +379 1176 346 1025 344 822 c + +ce} _d +/eight{1303 0 137 -29 1165 1520 sc +954 408 m +954 515 927 597 874 656 c +821 715 747 745 651 745 c +555 745 480 715 427 656 c +374 597 348 515 348 408 c +348 301 374 217 427 158 c +480 99 555 70 651 70 c +747 70 821 99 874 158 c +927 217 954 301 954 408 c + +913 1133 m +913 1224 890 1294 844 1345 c +798 1396 734 1421 651 1421 c +569 1421 505 1396 458 1345 c +412 1294 389 1224 389 1133 c +389 1042 412 971 458 920 c +505 869 569 844 651 844 c +734 844 798 869 844 920 c +890 971 913 1042 913 1133 c + +805 795 m +918 780 1007 738 1070 669 c +1133 601 1165 514 1165 408 c +1165 268 1121 160 1032 84 c +943 9 816 -29 651 -29 c +486 -29 359 9 270 84 c +181 160 137 268 137 408 c +137 514 169 601 232 669 c +295 738 384 780 498 795 c +397 813 320 851 266 909 c +213 968 186 1042 186 1133 c +186 1253 227 1347 310 1416 c +393 1485 506 1520 651 1520 c +796 1520 909 1485 992 1416 c +1075 1347 1116 1253 1116 1133 c +1116 1042 1089 968 1035 909 c +982 851 905 813 805 795 c + +ce} _d +/K{1530 0 113 0 1561 1493 sc +113 0 m +113 106 l +303 106 l +303 1386 l +113 1386 l +113 1493 l +696 1493 l +696 1386 l +506 1386 l +506 821 l +1149 1386 l +987 1386 l +987 1493 l +1483 1493 l +1483 1386 l +1315 1386 l +674 823 l +1391 106 l +1561 106 l +1561 0 l +1214 0 l +506 709 l +506 106 l +696 106 l +696 0 l +113 0 l + +ce} _d +/N{1792 0 100 -29 1702 1493 sc +100 0 m +100 106 l +301 106 l +301 1386 l +100 1386 l +100 1493 l +483 1493 l +1378 315 l +1378 1386 l +1178 1386 l +1178 1493 l +1702 1493 l +1702 1386 l +1501 1386 l +1501 -29 l +1380 -29 l +424 1229 l +424 106 l +625 106 l +625 0 l +100 0 l + +ce} _d +/S{1403 0 172 -29 1253 1520 sc +190 72 m +190 412 l +305 411 l +308 298 341 214 403 159 c +466 105 561 78 688 78 c +807 78 897 101 959 148 c +1022 195 1053 264 1053 354 c +1053 426 1034 481 996 520 c +959 559 879 596 758 633 c +561 692 l +418 735 318 789 259 854 c +201 919 172 1007 172 1120 c +172 1247 217 1345 307 1415 c +397 1485 523 1520 686 1520 c +755 1520 831 1512 914 1497 c +997 1482 1085 1461 1178 1432 c +1178 1114 l +1065 1114 l +1054 1219 1018 1295 959 1342 c +900 1389 811 1413 690 1413 c +585 1413 504 1391 449 1348 c +394 1305 367 1243 367 1161 c +367 1090 388 1034 429 993 c +470 952 558 912 692 872 c +877 817 l +1012 776 1109 724 1166 661 c +1224 598 1253 514 1253 408 c +1253 263 1207 154 1114 81 c +1021 8 883 -29 700 -29 c +618 -29 534 -21 449 -4 c +364 13 278 38 190 72 c + +ce} _d +/T{1366 0 20 0 1346 1493 sc +391 0 m +391 106 l +582 106 l +582 1374 l +143 1374 l +143 1141 l +20 1141 l +20 1493 l +1346 1493 l +1346 1141 l +1223 1141 l +1223 1374 l +784 1374 l +784 106 l +975 106 l +975 0 l +391 0 l + +ce} _d +/multiply{1716 0 283 68 1434 1217 sc +1434 1104 m +971 641 l +1434 180 l +1319 68 l +858 528 l +397 68 l +283 180 l +743 641 l +283 1104 l +397 1217 l +858 756 l +1319 1217 l +1434 1104 l + +ce} _d +/a{1221 0 102 -29 1163 1092 sc +815 334 m +815 559 l +578 559 l +487 559 419 539 374 500 c +329 461 307 400 307 319 c +307 245 330 186 375 143 c +420 100 482 78 559 78 c +636 78 697 102 744 149 c +791 196 815 258 815 334 c + +999 664 m +999 106 l +1163 106 l +1163 0 l +815 0 l +815 115 l +774 66 727 29 674 6 c +621 -17 558 -29 487 -29 c +369 -29 275 2 206 65 c +137 128 102 212 102 319 c +102 429 142 514 221 575 c +300 636 412 666 557 666 c +815 666 l +815 739 l +815 820 790 882 741 926 c +692 971 624 993 535 993 c +462 993 403 976 360 943 c +317 910 290 860 279 795 c +184 795 l +184 1010 l +248 1037 310 1058 370 1071 c +431 1085 490 1092 547 1092 c +694 1092 806 1055 883 982 c +960 909 999 803 999 664 c + +ce} _d +/b{1311 0 59 -29 1208 1556 sc +236 106 m +236 1450 l +59 1450 l +59 1556 l +420 1556 l +420 897 l +456 964 502 1013 557 1044 c +613 1076 682 1092 764 1092 c +895 1092 1001 1040 1084 937 c +1167 834 1208 699 1208 532 c +1208 365 1167 230 1084 126 c +1001 23 895 -29 764 -29 c +682 -29 613 -13 557 18 c +502 50 456 99 420 166 c +420 0 l +59 0 l +59 106 l +236 106 l + +420 479 m +420 351 444 253 493 186 c +542 119 614 86 707 86 c +801 86 872 124 920 199 c +969 274 993 385 993 532 c +993 679 969 790 920 865 c +872 940 801 977 707 977 c +614 977 542 943 493 876 c +444 809 420 711 420 584 c +420 479 l + +ce} _d +/e{1212 0 102 -29 1110 1092 sc +1110 512 m +317 512 l +317 504 l +317 361 344 252 398 179 c +452 106 532 70 637 70 c +718 70 784 91 835 133 c +887 176 923 239 944 322 c +1092 322 l +1063 205 1008 118 929 59 c +850 0 747 -29 618 -29 c +463 -29 338 22 243 124 c +149 227 102 363 102 532 c +102 700 148 835 241 938 c +334 1041 455 1092 606 1092 c +767 1092 890 1042 976 943 c +1062 844 1107 701 1110 512 c + +893 618 m +889 742 863 835 814 898 c +766 961 697 993 606 993 c +521 993 455 961 406 898 c +357 835 328 741 317 618 c +893 618 l + +ce} _d +/f{758 0 74 0 881 1556 sc +881 1305 m +784 1305 l +783 1355 769 1393 741 1419 c +714 1445 674 1458 621 1458 c +552 1458 504 1439 476 1401 c +448 1364 434 1297 434 1200 c +434 1063 l +731 1063 l +731 956 l +434 956 l +434 106 l +670 106 l +670 0 l +74 0 l +74 106 l +250 106 l +250 956 l +74 956 l +74 1063 l +250 1063 l +250 1196 l +250 1315 281 1404 342 1465 c +404 1526 495 1556 614 1556 c +659 1556 703 1552 748 1544 c +793 1536 837 1524 881 1507 c +881 1305 l + +ce} _d +/g{1311 0 102 -455 1251 1092 sc +1075 956 m +1075 23 l +1075 -130 1033 -247 949 -330 c +865 -413 745 -455 590 -455 c +520 -455 453 -449 389 -436 c +325 -423 264 -404 205 -379 c +205 -156 l +301 -156 l +313 -225 341 -276 386 -308 c +431 -340 495 -356 578 -356 c +686 -356 765 -325 815 -264 c +866 -203 891 -108 891 23 c +891 166 l +855 99 809 50 753 18 c +698 -13 629 -29 547 -29 c +416 -29 309 23 226 126 c +143 230 102 365 102 532 c +102 699 143 834 226 937 c +309 1040 416 1092 547 1092 c +629 1092 698 1076 753 1044 c +809 1013 855 964 891 897 c +891 1063 l +1251 1063 l +1251 956 l +1075 956 l + +891 584 m +891 711 866 809 817 876 c +768 943 697 977 604 977 c +509 977 438 940 389 865 c +341 790 317 679 317 532 c +317 385 341 274 389 199 c +438 124 509 86 604 86 c +697 86 768 119 817 186 c +866 253 891 351 891 479 c +891 584 l + +ce} _d +/i{655 0 74 0 608 1507 sc +199 1393 m +199 1424 210 1450 232 1473 c +255 1496 282 1507 313 1507 c +344 1507 370 1496 392 1473 c +415 1450 426 1424 426 1393 c +426 1362 415 1335 393 1313 c +371 1291 344 1280 313 1280 c +282 1280 255 1291 232 1313 c +210 1335 199 1362 199 1393 c + +434 106 m +608 106 l +608 0 l +74 0 l +74 106 l +250 106 l +250 956 l +74 956 l +74 1063 l +434 1063 l +434 106 l + +ce} _d +/l{655 0 59 0 594 1556 sc +420 106 m +594 106 l +594 0 l +59 0 l +59 106 l +236 106 l +236 1450 l +59 1450 l +59 1556 l +420 1556 l +420 106 l + +ce} _d +/m{1942 0 74 0 1886 1092 sc +1061 856 m +1096 934 1141 993 1196 1032 c +1251 1072 1316 1092 1389 1092 c +1500 1092 1583 1057 1638 988 c +1693 919 1720 815 1720 676 c +1720 106 l +1886 106 l +1886 0 l +1376 0 l +1376 106 l +1536 106 l +1536 655 l +1536 764 1520 841 1488 887 c +1456 933 1403 956 1329 956 c +1247 956 1184 925 1141 863 c +1098 801 1077 711 1077 592 c +1077 106 l +1237 106 l +1237 0 l +733 0 l +733 106 l +893 106 l +893 662 l +893 768 877 843 845 888 c +813 933 760 956 686 956 c +604 956 541 925 498 863 c +455 801 434 711 434 592 c +434 106 l +594 106 l +594 0 l +84 0 l +84 106 l +250 106 l +250 958 l +74 958 l +74 1063 l +434 1063 l +434 874 l +468 945 511 999 564 1036 c +617 1073 676 1092 743 1092 c +826 1092 895 1071 950 1030 c +1005 989 1042 931 1061 856 c + +ce} _d +/n{1319 0 74 0 1262 1092 sc +84 0 m +84 106 l +250 106 l +250 956 l +74 956 l +74 1063 l +434 1063 l +434 874 l +468 946 512 1000 566 1037 c +621 1074 684 1092 756 1092 c +873 1092 960 1058 1015 991 c +1070 924 1098 819 1098 676 c +1098 106 l +1262 106 l +1262 0 l +754 0 l +754 106 l +913 106 l +913 618 l +913 748 897 837 865 885 c +833 934 777 958 696 958 c +611 958 546 927 501 864 c +456 802 434 711 434 592 c +434 106 l +594 106 l +594 0 l +84 0 l + +ce} _d +/o{1233 0 102 -29 1130 1092 sc +616 70 m +715 70 789 109 839 187 c +890 265 915 380 915 532 c +915 684 890 799 839 876 c +789 954 715 993 616 993 c +517 993 443 954 392 876 c +342 799 317 684 317 532 c +317 380 342 265 393 187 c +444 109 518 70 616 70 c + +616 -29 m +461 -29 337 22 243 124 c +149 227 102 363 102 532 c +102 701 149 837 242 939 c +336 1041 461 1092 616 1092 c +771 1092 896 1041 989 939 c +1083 837 1130 701 1130 532 c +1130 363 1083 227 989 124 c +896 22 771 -29 616 -29 c + +ce} _d +/p{1311 0 59 -426 1208 1092 sc +420 584 m +420 479 l +420 351 444 253 493 186 c +542 119 614 86 707 86 c +801 86 872 124 920 199 c +969 274 993 385 993 532 c +993 679 969 790 920 865 c +872 940 801 977 707 977 c +614 977 542 943 493 876 c +444 809 420 711 420 584 c + +236 956 m +59 956 l +59 1063 l +420 1063 l +420 897 l +456 964 502 1013 557 1044 c +613 1076 682 1092 764 1092 c +895 1092 1001 1040 1084 937 c +1167 834 1208 699 1208 532 c +1208 365 1167 230 1084 126 c +1001 23 895 -29 764 -29 c +682 -29 613 -13 557 18 c +502 50 456 99 420 166 c +420 -319 l +594 -319 l +594 -426 l +59 -426 l +59 -319 l +236 -319 l +236 956 l + +ce} _d +/r{979 0 74 0 979 1092 sc +979 1065 m +979 799 l +873 799 l +870 852 855 891 829 917 c +803 943 765 956 715 956 c +624 956 555 925 506 862 c +458 799 434 709 434 592 c +434 106 l +647 106 l +647 0 l +84 0 l +84 106 l +250 106 l +250 958 l +74 958 l +74 1063 l +434 1063 l +434 874 l +470 948 516 1003 573 1038 c +630 1074 699 1092 780 1092 c +810 1092 841 1090 874 1085 c +907 1080 942 1074 979 1065 c + +ce} _d +/s{1051 0 115 -29 946 1092 sc +115 59 m +115 307 l +221 307 l +224 228 248 168 295 129 c +342 90 412 70 504 70 c +587 70 650 85 693 116 c +736 147 758 193 758 252 c +758 299 742 336 710 365 c +679 394 612 424 510 457 c +377 502 l +286 531 219 568 178 612 c +137 656 117 712 117 780 c +117 877 153 954 224 1009 c +295 1064 394 1092 520 1092 c +576 1092 635 1085 697 1070 c +759 1055 823 1034 889 1006 c +889 774 l +783 774 l +780 843 756 896 711 935 c +666 974 604 993 526 993 c +449 993 390 979 350 952 c +311 925 291 884 291 829 c +291 784 306 748 336 721 c +366 694 426 667 516 639 c +662 594 l +763 563 835 523 879 476 c +924 429 946 369 946 295 c +946 194 907 115 830 57 c +753 -0 647 -29 512 -29 c +443 -29 376 -22 311 -7 c +246 8 180 30 115 59 c + +ce} _d +/u{1319 0 55 -29 1243 1063 sc +725 1063 m +1069 1063 l +1069 106 l +1243 106 l +1243 0 l +885 0 l +885 188 l +851 117 807 63 753 26 c +699 -11 636 -29 565 -29 c +447 -29 360 4 304 71 c +249 138 221 244 221 387 c +221 956 l +55 956 l +55 1063 l +406 1063 l +406 444 l +406 315 422 226 453 178 c +485 130 542 106 623 106 c +708 106 773 137 818 200 c +863 263 885 354 885 473 c +885 956 l +725 956 l +725 1063 l + +ce} _d +/y{1157 0 -6 -455 1151 1063 sc +442 -195 m +512 -18 l +115 956 l +-6 956 l +-6 1063 l +483 1063 l +483 956 l +313 956 l +612 225 l +911 956 l +752 956 l +752 1063 l +1151 1063 l +1151 956 l +1032 956 l +545 -240 l +512 -323 475 -379 434 -409 c +393 -440 336 -455 262 -455 c +231 -455 198 -452 165 -447 c +132 -442 99 -434 66 -424 c +66 -221 l +160 -221 l +164 -266 175 -299 194 -318 c +213 -338 243 -348 283 -348 c +320 -348 349 -338 371 -317 c +394 -297 417 -256 442 -195 c + +ce} _d +end readonly def + +/BuildGlyph { + exch begin + CharStrings exch + 2 copy known not {pop /.notdef} if + true 3 1 roll get exec + end +} _d + +/BuildChar { + 1 index /Encoding get exch get + 1 index /BuildGlyph get exec +} _d + +FontName currentdict end definefont pop +end +%%EndProlog +mpldict begin +75.6 223.2 translate +460.8 345.6 0 0 clipbox +gsave +0 0 m +460.8 0 l +460.8 345.6 l +0 345.6 l +cl +1.000 setgray +fill +grestore +gsave +108.312798 56.796 m +433.392671 56.796 l +433.392671 330.048 l +108.312798 330.048 l +cl +1.000 setgray +fill +grestore +0.800 setlinewidth +1 setlinejoin +1 setlinecap +[] 0 setdash +0.800 setgray +gsave +325.08 273.252 108.313 56.796 clipbox +108.312798 56.796 m +108.312798 330.048 l +stroke +grestore +0.150 setgray +gsave +96.3128 38.1991 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/one glyphshow +8.43005 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +16.9782 5.24062 moveto +/one glyphshow +grestore +0.800 setgray +gsave +325.08 273.252 108.313 56.796 clipbox +173.328772 56.796 m +173.328772 330.048 l +stroke +grestore +0.150 setgray +gsave +161.329 38.1991 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/one glyphshow +8.43005 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +16.9782 5.24062 moveto +/two glyphshow +grestore +0.800 setgray +gsave +325.08 273.252 108.313 56.796 clipbox +238.344747 56.796 m +238.344747 330.048 l +stroke +grestore +0.150 setgray +gsave +226.345 38.1991 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/one glyphshow +8.43005 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +16.9782 5.24062 moveto +/three glyphshow +grestore +0.800 setgray +gsave +325.08 273.252 108.313 56.796 clipbox +303.360722 56.796 m +303.360722 330.048 l +stroke +grestore +0.150 setgray +gsave +291.361 38.1991 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/one glyphshow +8.43005 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +16.9782 5.24062 moveto +/four glyphshow +grestore +0.800 setgray +gsave +325.08 273.252 108.313 56.796 clipbox +368.376696 56.796 m +368.376696 330.048 l +stroke +grestore +0.150 setgray +gsave +356.377 38.1991 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.635938 moveto +/one glyphshow +8.43005 0.635938 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +16.9782 5.36094 moveto +/five glyphshow +grestore +0.800 setgray +gsave +325.08 273.252 108.313 56.796 clipbox +433.392671 56.796 m +433.392671 330.048 l +stroke +grestore +0.150 setgray +gsave +421.393 38.1991 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/one glyphshow +8.43005 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +16.9782 5.24062 moveto +/six glyphshow +grestore +/DejaVuSerif 14.400 selectfont +gsave + +198.384 20.8554 translate +0 rotate +0 0 m /N glyphshow +12.5781 0 m /u glyphshow +21.8362 0 m /m glyphshow +35.4672 0 m /b glyphshow +44.6692 0 m /e glyphshow +53.1763 0 m /r glyphshow +60.0479 0 m /space glyphshow +64.6173 0 m /o glyphshow +73.2718 0 m /f glyphshow +78.5922 0 m /space glyphshow +83.1616 0 m /S glyphshow +93.0093 0 m /a glyphshow +101.58 0 m /m glyphshow +115.211 0 m /p glyphshow +124.413 0 m /l glyphshow +129.01 0 m /e glyphshow +137.517 0 m /s glyphshow +grestore +gsave +33.7128 61.3028 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/two glyphshow +8.43005 0.515625 moveto +/period glyphshow +12.2668 0.515625 moveto +/two glyphshow +23.2783 0.515625 moveto +/multiply glyphshow +36.9618 0.515625 moveto +/one glyphshow +45.3918 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +53.94 5.24062 moveto +/minus glyphshow +61.7115 5.24062 moveto +/two glyphshow +grestore +gsave +32.7128 111.767 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/two glyphshow +8.43005 0.515625 moveto +/period glyphshow +12.6418 0.515625 moveto +/four glyphshow +23.6533 0.515625 moveto +/multiply glyphshow +37.3368 0.515625 moveto +/one glyphshow +45.7668 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +54.315 5.24062 moveto +/minus glyphshow +62.0865 5.24062 moveto +/two glyphshow +grestore +gsave +32.7128 158.19 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/two glyphshow +8.43005 0.515625 moveto +/period glyphshow +12.6418 0.515625 moveto +/six glyphshow +23.6533 0.515625 moveto +/multiply glyphshow +37.3368 0.515625 moveto +/one glyphshow +45.7668 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +54.315 5.24062 moveto +/minus glyphshow +62.0865 5.24062 moveto +/two glyphshow +grestore +gsave +33.7128 201.171 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/two glyphshow +8.43005 0.515625 moveto +/period glyphshow +12.1418 0.515625 moveto +/eight glyphshow +23.1533 0.515625 moveto +/multiply glyphshow +36.8368 0.515625 moveto +/one glyphshow +45.2668 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +53.815 5.24062 moveto +/minus glyphshow +61.5865 5.24062 moveto +/two glyphshow +grestore +gsave +45.7128 241.186 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/three glyphshow +11.0115 0.515625 moveto +/multiply glyphshow +24.6949 0.515625 moveto +/one glyphshow +33.125 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +41.6732 5.24062 moveto +/minus glyphshow +49.4446 5.24062 moveto +/two glyphshow +grestore +gsave +33.7128 278.616 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/three glyphshow +8.43005 0.515625 moveto +/period glyphshow +12.2668 0.515625 moveto +/two glyphshow +23.2783 0.515625 moveto +/multiply glyphshow +36.9618 0.515625 moveto +/one glyphshow +45.3918 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +53.94 5.24062 moveto +/minus glyphshow +61.7115 5.24062 moveto +/two glyphshow +grestore +gsave +32.7128 313.777 translate +0 rotate +/DejaVuSerif 13.200000000000001 selectfont +0 0.515625 moveto +/three glyphshow +8.43005 0.515625 moveto +/period glyphshow +12.6418 0.515625 moveto +/four glyphshow +23.6533 0.515625 moveto +/multiply glyphshow +37.3368 0.515625 moveto +/one glyphshow +45.7668 0.515625 moveto +/zero glyphshow +/DejaVuSerif 9.24 selectfont +54.315 5.24062 moveto +/minus glyphshow +62.0865 5.24062 moveto +/two glyphshow +grestore +/DejaVuSerif 14.400 selectfont +gsave + +25.6034 141.984 translate +90 rotate +0 0 m /T glyphshow +9.58801 0 m /r glyphshow +16.4597 0 m /a glyphshow +25.0299 0 m /i glyphshow +29.6274 0 m /n glyphshow +38.8855 0 m /i glyphshow +43.483 0 m /n glyphshow +52.7411 0 m /g glyphshow +61.9431 0 m /space glyphshow +66.5125 0 m /T glyphshow +76.1005 0 m /i glyphshow +80.6979 0 m /m glyphshow +94.3289 0 m /e glyphshow +grestore +1.200 setlinewidth +0.122 0.467 0.706 setrgbcolor +gsave +325.08 273.252 108.313 56.796 clipbox +283.788963 308.124776 m +stroke +grestore +gsave +325.08 273.252 108.313 56.796 clipbox +283.788963 299.902455 m +283.788963 317.627455 l +stroke +grestore +0 setlinecap +[4.8 1.8] 0 setdash +gsave +325.08 273.252 108.313 56.796 clipbox +283.788963 141.977109 m +stroke +grestore +1 setlinecap +[] 0 setdash +gsave +325.08 273.252 108.313 56.796 clipbox +283.788963 134.698126 m +283.788963 149.137791 l +stroke +grestore +0 setlinecap +[1.2 1.2] 0 setdash +gsave +325.08 273.252 108.313 56.796 clipbox +283.788963 81.229542 m +stroke +grestore +1 setlinecap +[] 0 setdash +gsave +325.08 273.252 108.313 56.796 clipbox +283.788963 69.216545 m +283.788963 92.851816 l +stroke +grestore +1.000 setlinewidth +0 setlinejoin +2 setlinecap +0.800 setgray +gsave +108.312798 56.796 m +108.312798 330.048 l +stroke +grestore +gsave +433.392671 56.796 m +433.392671 330.048 l +stroke +grestore +gsave +108.312798 56.796 m +433.392671 56.796 l +stroke +grestore +gsave +108.312798 330.048 m +433.392671 330.048 l +stroke +grestore +0.800 setlinewidth +0 setlinecap +gsave +342.834546 241.61925 m +424.152671 241.61925 l +425.912671 241.61925 426.792671 242.49925 426.792671 244.25925 c +426.792671 320.808 l +426.792671 322.568 425.912671 323.448 424.152671 323.448 c +342.834546 323.448 l +341.074546 323.448 340.194546 322.568 340.194546 320.808 c +340.194546 244.25925 l +340.194546 242.49925 341.074546 241.61925 342.834546 241.61925 c +cl +gsave +1.000 setgray +fill +grestore +stroke +grestore +0.150 setgray +/DejaVuSerif 14.400 selectfont +gsave + +359.431 307.527 translate +0 rotate +0 0 m /K glyphshow +10.3641 0 m /e glyphshow +18.8712 0 m /r glyphshow +25.7429 0 m /n glyphshow +35.001 0 m /e glyphshow +43.5081 0 m /l glyphshow +grestore +1.200 setlinewidth +1 setlinejoin +1 setlinecap +0.122 0.467 0.706 setrgbcolor +gsave +345.474546 292.766125 m +358.674546 292.766125 l +371.874546 292.766125 l +stroke +grestore +0.150 setgray +/DejaVuSerif 13.200 selectfont +gsave + +382.435 288.146 translate +0 rotate +0 0 m /r glyphshow +6.33386 0 m /b glyphshow +14.8157 0 m /f glyphshow +grestore +0 setlinecap +[4.8 1.8] 0 setdash +0.122 0.467 0.706 setrgbcolor +gsave +345.474546 273.588 m +358.674546 273.588 l +371.874546 273.588 l +stroke +grestore +0.150 setgray +/DejaVuSerif 13.200 selectfont +gsave + +382.435 268.968 translate +0 rotate +0 0 m /p glyphshow +8.48181 0 m /o glyphshow +16.459 0 m /l glyphshow +20.6967 0 m /y glyphshow +grestore +[1.2 1.2] 0 setdash +0.122 0.467 0.706 setrgbcolor +gsave +345.474546 254.222375 m +358.674546 254.222375 l +371.874546 254.222375 l +stroke +grestore +0.150 setgray +/DejaVuSerif 13.200 selectfont +gsave + +382.435 249.602 translate +0 rotate +0 0 m /l glyphshow +4.23767 0 m /i glyphshow +8.47534 0 m /n glyphshow +17.0089 0 m /e glyphshow +24.8502 0 m /a glyphshow +32.7498 0 m /r glyphshow +grestore + +end +showpage diff --git a/examples/security/truthseeker/retrain.py b/examples/security/truthseeker/retrain.py index 6b91b13c..4a0928a4 100644 --- a/examples/security/truthseeker/retrain.py +++ b/examples/security/truthseeker/retrain.py @@ -236,9 +236,9 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list: # Parse Model Results results = pd.read_csv("output/train.csv") # Some convenient variable names -# input_size = results["data.generate.kwargs.n_samples"] * results["data.generate.kwargs.n_features"] -results["Kernel"] = results["model.init.kwargs.kernel"].copy() -# results["Features"] = results["data.generate.kwargs.n_features"].copy() +# input_size = results["data.generate.n_samples"] * results["data.generate.n_features"] +results["Kernel"] = results["model.init.kernel"].copy() +# results["Features"] = results["data.generate.n_features"].copy() # results["Samples"] = results["data.sample.train_size"].copy() # results["input_size"] = input_size # Clean up results @@ -249,7 +249,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list: results[col] = results[col].apply(lambda x: x[0]) # Subset results # subset = results[results["data.sample.train_size"] == 10000] -# subset = subset[subset["data.generate.kwargs.n_features"] == 100] +# subset = subset[subset["data.generate.n_features"] == 100] with open("conf/model/best_rbf.yaml", "r") as f: best_rbf = yaml.safe_load(f) best_rbf["init"].pop("_target_", None) @@ -341,7 +341,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list: params = json.load(f) else: raise ValueError(f"No params file found for {folder}") - attack_params = params["attack"]["init"]["kwargs"] + attack_params = params["attack"]["init"] attack_params.update({"name": params["attack"]["init"]["name"]}) confidence_ser["Kernel"] = name confidence_ser["Average False Confidence"] = avg_prob @@ -429,7 +429,7 @@ def save_results_and_outputs(results, outputs, path="retrain") -> list: else: logger.warning(f"No params file found for {folder}") continue - attack_params = params["attack"]["init"]["kwargs"] + attack_params = params["attack"]["init"] attack_params.update({"name": params["attack"]["init"]["name"]}) confidence_ser["Kernel"] = name confidence_ser["Average False Confidence After Retraining"] = avg_prob