Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make proper submodules #183

Merged
merged 35 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
0cac8b3
better main.py, linting
simplymathematics Jul 6, 2024
af68900
remove tmp
simplymathematics Jul 6, 2024
d567a14
linting
simplymathematics Jul 6, 2024
e4ffd7c
better main, linting
simplymathematics Jul 13, 2024
d0622b2
removed old notebook
simplymathematics Jul 13, 2024
beac652
update params file for gzip
simplymathematics Jul 13, 2024
aa9c431
removed unused import
simplymathematics Jul 13, 2024
e9b76b9
removed unused import
simplymathematics Jul 13, 2024
d5a229f
added missing import
simplymathematics Jul 13, 2024
18194e4
rename main function
simplymathematics Jul 13, 2024
583e052
rename parser, main function
simplymathematics Jul 13, 2024
ce7ecdf
rename parser, main function
simplymathematics Jul 13, 2024
ce672cd
rename main function
simplymathematics Jul 13, 2024
febf205
remove old script
simplymathematics Jul 13, 2024
6fba4b5
rename parser, main function
simplymathematics Jul 13, 2024
9f69050
stop tracking params.yaml
simplymathematics Jul 19, 2024
29f10f9
update configs for final plots
simplymathematics Jul 19, 2024
8332c26
removed fixed random number seed
simplymathematics Jul 22, 2024
eaf4c5b
renamed functions for clarity
simplymathematics Jul 22, 2024
b712142
gzip conf changes
simplymathematics Jul 22, 2024
8566540
rename functions for main script
simplymathematics Jul 22, 2024
edfe15f
update layers for main file
simplymathematics Jul 22, 2024
e06dc96
update main script
simplymathematics Jul 22, 2024
97003e0
linting
simplymathematics Jul 22, 2024
6c9ca99
linting
simplymathematics Jul 22, 2024
3c82c93
update __all__
simplymathematics Jul 30, 2024
fa66df2
add support for columns missing in subsets
simplymathematics Jul 30, 2024
fc01a49
remove extra column in results csv
simplymathematics Jul 30, 2024
34cb192
remove item. from file config during matrix and foreach stages
simplymathematics Jul 30, 2024
2a67131
improved plotting
simplymathematics Jul 30, 2024
1358a71
get batched gzip classifier working
simplymathematics Jul 30, 2024
7115531
refactor gzip classifier
simplymathematics Jul 30, 2024
4de24a1
config updates for gzip
simplymathematics Jul 30, 2024
7b68187
liniting
simplymathematics Jul 30, 2024
76f77ee
linting
simplymathematics Jul 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 66 additions & 92 deletions deckard/__main__.py
Original file line number Diff line number Diff line change
@@ -1,106 +1,80 @@
#!/usr/bin/env python3
import argparse
import subprocess
import sys
import logging
from pathlib import Path
from omegaconf import OmegaConf
from .layers.parse import save_params_file
from .layers.afr import afr_parser, afr_main
from .layers.attack import attack_parser, attack_main
from .layers.clean_data import clean_data_parser, clean_data_main
from .layers.compile import compile_parser, compile_main
from .layers.data import data_parser, data_main
from .layers.experiment import experiment_parser, experiment_main
from .layers.find_best import find_best_parser, find_best_main
from .layers.generate_grid import generate_grid_parser, generate_grid_main
from .layers.hydra_test import hydra_test_main
from .layers.merge import merge_parser, merge_main
from .layers.optimise import optimise_main
from .layers.parse import hydra_parser, parse_hydra_config
from .layers.plots import plots_parser, plots_main
from .layers.prepare_queue import prepare_queue_main
from .layers.query_kepler import kepler_parser, kepler_main

OmegaConf.register_new_resolver("eval", eval)

logger = logging.getLogger(__name__)
layer_list = list(Path(Path(__file__).parent, "layers").glob("*.py"))
layer_list = [layer.stem for layer in layer_list]
if "__init__" in layer_list:
layer_list.remove("__init__")
layer_list.append(None)
layer_list = [
"afr",
"attack",
"clean_data" "compile",
"data",
"experiment",
"find_best",
"generate_grid",
"hydra_test",
"merge",
"optimise",
"parse",
"plots",
"prepare_queue",
"query_kepler",
]


def run_submodule(submodule, args):
if len(args) == 0:
cmd = f"python -m deckard.layers.{submodule}"
else:
cmd = f"python -m deckard.layers.{submodule} {args}"
logger.info(f"Running {cmd}")
with subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True,
) as proc:
for line in proc.stdout:
print(line.rstrip().decode("utf-8"))
if proc.returncode != 0:
logger.error(f"Error running {cmd}")
for line in proc.stderr:
logger.error(line.rstrip().decode("utf-8"))
return 1
else:
return 0
deckard_layer_dict = {
"afr": (afr_parser, afr_main),
"attack": (attack_parser, attack_main),
"clean_data": (clean_data_parser, clean_data_main),
"compile": (compile_parser, compile_main),
"data": (data_parser, data_main),
"experiment": (experiment_parser, experiment_main),
"find_best": (find_best_parser, find_best_main),
"generate_grid": (generate_grid_parser, generate_grid_main),
"hydra_test": (None, hydra_test_main),
"merge": (merge_parser, merge_main),
"optimise": (None, optimise_main),
"parse": (hydra_parser, parse_hydra_config),
"plots": (plots_parser, plots_main),
"prepare_queue": (None, prepare_queue_main),
"query_kepler": (kepler_parser, kepler_main),
}
assert len(deckard_layer_dict) == len(
layer_list,
), "Some layers are missing from the deckard_layer_dict"


def parse_and_repro(args, default_config="default.yaml", config_dir="conf"):
if len(args) == 0:
assert (
save_params_file(
config_dir=(
Path(Path(), config_dir)
if not Path(config_dir).is_absolute()
else Path(config_dir)
),
config_file=default_config,
)
is None
)
assert Path(Path(), "params.yaml").exists()
else:
cmd = f"python -m deckard.layers.parse {args} --config_file {default_config}"
# error = f"error parsing command: {cmd} {args}"
with subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) as proc:
for line in proc.stdout:
print(line.rstrip().decode("utf-8"))
if Path(Path(), "dvc.yaml").exists():
cmd = "dvc repro"
with subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) as proc:
for line in proc.stdout:
print(line.rstrip().decode("utf-8"))

else:
raise ValueError("No dvc.yaml file found. Please construct a pipeline.")
return 0
def main(layer, args):
# Get the layer and the main function for the layer.
if layer not in deckard_layer_dict:
raise ValueError(f"Layer {layer} not found.")
parser, sub_main = deckard_layer_dict[layer]
# Parse the arguments.
args = parser.parse_args(args.args)
# Print the arguments and values
# Run the main function.
sub_main(args)


if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser()
parser.add_argument(
"--submodule",
type=str,
help=f"Submodule to run. Choices: {layer_list}",
)
parser.add_argument(
"--config_file",
type=str,
help="default hydra configuration file that you would like to reproduce with dvc repro.",
)
parser.add_argument("--config_dir", type=str, default="conf")
parser.add_argument("other_args", type=str, nargs="*")
args = parser.parse_args()
submodule = args.submodule
if submodule is not None:
assert (
args.config_file is None
), "config_file and submodule cannot be specified at the same time"
if submodule not in layer_list and submodule is not None:
raise ValueError(f"Submodule {submodule} not found. Choices: {layer_list}")
if len(args.other_args) > 0:
other_args = " ".join(args.other_args)
else:
other_args = []
if submodule is None:
assert (
parse_and_repro(other_args, args.config_file, config_dir=args.config_dir)
== 0
)
else:
assert run_submodule(submodule, other_args) == 0
# pop the first argument which is the script name
layer = sys.argv.pop(1)
# pass the rest of the arguments to the main function
main(layer, sys.argv)
12 changes: 10 additions & 2 deletions deckard/layers/afr.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@

logger = logging.getLogger(__name__)

__all__ = [
"afr_main",
"survival_probability_calibration",
"fit_aft",
"plot_aft",
"afr_parser",
]


# Modified from https://github.com/CamDavidsonPilon/lifelines/blob/master/lifelines/calibration.py
def survival_probability_calibration(
Expand Down Expand Up @@ -872,7 +880,7 @@ def calculate_raw_failures(args, data, config):
return data


def main(args):
def afr_main(args):
target = args.target
duration_col = args.duration_col
dataset = args.dataset
Expand Down Expand Up @@ -929,4 +937,4 @@ def main(args):
afr_parser.add_argument("--config_file", type=str, default="afr.yaml")
afr_parser.add_argument("--plots_folder", type=str, default="plots")
args = afr_parser.parse_args()
main(args)
afr_main(args)
26 changes: 14 additions & 12 deletions deckard/layers/clean_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,9 @@ def replace_strings_in_data(data, replace_dict):
v,
dict,
), f"Value for key {k} in replace_dict is not a dictionary."
assert k in data.columns, f"Key {k} not in data.columns."
if k not in data.columns:
logger.warning(f"Column {k} not in data. Ignoring.")
continue
for k1, v1 in v.items():
logger.info(f"Replacing {k1} with {v1} in {k}...")
k1 = str(k1)
Expand Down Expand Up @@ -610,41 +612,41 @@ def drop_values(data, drop_dict):
return data


parser = argparse.ArgumentParser()
parser.add_argument(
clean_data_parser = argparse.ArgumentParser()
clean_data_parser.add_argument(
"-i",
"--input_file",
type=str,
help="Data file to read from",
required=True,
)
parser.add_argument(
clean_data_parser.add_argument(
"-o",
"--output_file",
type=str,
help="Data file to read from",
required=True,
)
parser.add_argument(
clean_data_parser.add_argument(
"-v",
"--verbosity",
default="INFO",
help="Increase output verbosity",
)
parser.add_argument(
clean_data_parser.add_argument(
"-c",
"--config",
help="Path to the config file",
default="clean.yaml",
)
parser.add_argument(
clean_data_parser.add_argument(
"-s",
"--subset",
help="Subset of data you would like to plot",
default=None,
nargs="?",
)
parser.add_argument(
clean_data_parser.add_argument(
"-d",
"--drop_if_empty",
help="Drop row if this columns is empty",
Expand All @@ -656,14 +658,14 @@ def drop_values(data, drop_dict):
"predict_time",
],
)
parser.add_argument(
clean_data_parser.add_argument(
"--pareto_dict",
help="Path to (optional) pareto set dictionary.",
default=None,
)


def main(args):
def clean_data_main(args):
logging.basicConfig(level=args.verbosity)
assert Path(
args.input_file,
Expand Down Expand Up @@ -726,5 +728,5 @@ def main(args):


if __name__ == "__main__":
args = parser.parse_args()
main(args)
args = clean_data_parser.parse_args()
clean_data_main(args)
38 changes: 24 additions & 14 deletions deckard/layers/compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
from tqdm import tqdm
import yaml
import argparse


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -172,13 +173,13 @@ def load_results(results_file, results_folder) -> pd.DataFrame:
Path(results_folder).mkdir(exist_ok=True, parents=True)
suffix = results_file.suffix
if suffix == ".csv":
results = pd.read_csv(results_file)
results = pd.read_csv(results_file, index_col=0)
elif suffix == ".xlsx":
results = pd.read_excel(results_file)
results = pd.read_excel(results_file, index_col=0)
elif suffix == ".html":
results = pd.read_html(results_file)
results = pd.read_html(results_file, index_col=0)
elif suffix == ".json":
results = pd.read_json(results_file)
results = pd.read_json(results_file, index_col=0)
elif suffix == ".tex":
pd.read_csv(
results_file,
Expand All @@ -187,6 +188,7 @@ def load_results(results_file, results_folder) -> pd.DataFrame:
skiprows=4,
skipfooter=3,
engine="python",
index_col=0,
)
else:
raise ValueError(f"File type {suffix} not supported.")
Expand All @@ -196,16 +198,7 @@ def load_results(results_file, results_folder) -> pd.DataFrame:
return results


if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--results_file", type=str, default="results.csv")
parser.add_argument("--report_folder", type=str, default="reports", required=True)
parser.add_argument("--results_folder", type=str, default=".")
parser.add_argument("--exclude", type=list, default=None, nargs="*")
parser.add_argument("--verbose", type=str, default="INFO")
args = parser.parse_args()
def compile_main(parse_results, save_results, args):
logging.basicConfig(level=args.verbose)
report_folder = args.report_folder
results_file = args.results_file
Expand All @@ -215,3 +208,20 @@ def load_results(results_file, results_folder) -> pd.DataFrame:
assert Path(
report_file,
).exists(), f"Results file {report_file} does not exist. Something went wrong."


compile_parser = argparse.ArgumentParser()
compile_parser.add_argument("--results_file", type=str, default="results.csv")
compile_parser.add_argument(
"--report_folder",
type=str,
default="reports",
required=True,
)
compile_parser.add_argument("--results_folder", type=str, default=".")
compile_parser.add_argument("--exclude", type=list, default=None, nargs="*")
compile_parser.add_argument("--verbose", type=str, default="INFO")

if __name__ == "__main__":
args = compile_parser.parse_args()
compile_main(parse_results, save_results, args)
23 changes: 0 additions & 23 deletions deckard/layers/deploy.py

This file was deleted.

Loading
Loading