diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py new file mode 100644 index 00000000..8df55050 --- /dev/null +++ b/icu_benchmarks/models/domain_adaptation.py @@ -0,0 +1,392 @@ +import inspect +import json +import os +import random +import gin +import torch +import logging +import numpy as np +import pandas as pd +from pathlib import Path +import scipy.stats as stats +from sklearn.metrics import log_loss +from skopt import gp_minimize + +from icu_benchmarks.data.loader import PredictionDataset +from icu_benchmarks.data.preprocessor import Preprocessor, DefaultClassificationPreprocessor +from icu_benchmarks.tuning.hyperparameters import choose_and_bind_hyperparameters +from icu_benchmarks.models.train import train_common +from icu_benchmarks.models.wrappers import DLWrapper, MLWrapper +from icu_benchmarks.models.utils import JsonResultLoggingEncoder +from icu_benchmarks.run_utils import log_full_line +from .constants import MLMetrics + + +def load_model(model_dir: Path, log_dir: Path): + """Load model from gin config.""" + gin.parse_config_file(model_dir / "train_config.gin") + model_type = gin.query_parameter("train_common.model") + if str(model_type) == "@DLWrapper()": + model = DLWrapper() + elif str(model_type) == "@MLWrapper()": + model = MLWrapper() + model.set_log_dir(log_dir) + if (model_dir / "model.torch").is_file(): + model.load_weights(model_dir / "model.torch") + elif (model_dir / "model.txt").is_file(): + model.load_weights(model_dir / "model.txt") + elif (model_dir / "model.joblib").is_file(): + model.load_weights(model_dir / "model.joblib") + else: + raise Exception("No weights to load at path : {}".format(model_dir / "model.*")) + return model + + +def get_predictions_for_single_model(dataset: PredictionDataset, model_dir: Path, log_dir: Path): + """Get predictions for a single model. + + Args: + target_model: Model to get predictions for. + dataset: Dataset to get predictions for. + model_dir: Path to directory where model weights are stored. + log_dir: Path to directory where model output should be saved. + + Returns: + Tuple of predictions and labels. + """ + model = load_model(model_dir, log_dir) + return model.predict(dataset, None, None) + + +def calculate_metrics(predictions: np.ndarray, labels: np.ndarray): + metric_results = {} + for name, metric in MLMetrics.BINARY_CLASSIFICATION.items(): + value = metric(labels, predictions) + metric_results[name] = value + # Only log float values + # if isinstance(value, np.float): + # logging.info("Test {}: {}".format(name, value)) + return metric_results + + +def get_predictions_for_all_models( + target_model: object, + data: dict[str, pd.DataFrame], + log_dir: Path, + source_dir: Path = None, + seed: int = 1234, + reproducible: bool = True, + test_on: str = "test", + source_datasets: list = None, +): + """Common wrapper to train all benchmarked models. + + Args: + data: Dict containing data to be trained on. + log_dir: Path to directory where model output should be saved. + source_dir: If set to load weights, path to directory containing trained weights. + seed: Common seed used for any random operation. + reproducible: If set to true, set torch to run reproducibly. + """ + # Setting the seed before gin parsing + os.environ["PYTHONHASHSEED"] = str(seed) + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + if reproducible: + os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" + torch.use_deterministic_algorithms(True) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + test_dataset = PredictionDataset(data, split=test_on) + _, test_labels = test_dataset.get_data_and_labels() + + test_predictions = {} + test_predictions["target"] = target_model.predict(test_dataset, None, None) + for source in source_datasets: + model_dir = source_dir / source + test_predictions[model_dir.name] = get_predictions_for_single_model(test_dataset, model_dir, log_dir) + + for name, prediction in test_predictions.items(): + if isinstance(target_model, MLWrapper) and prediction.ndim == 2: + test_predictions[name] = prediction[:, 1] + + return test_predictions, test_labels + + +def domain_adaptation( + dataset: str, + data_dir: Path, + run_dir: Path, + seed: int, + task: str = None, + model: str = None, + debug: bool = False, +): + """Choose hyperparameters to tune and bind them to gin. + + Args: + data_dir: Path to the data directory. + run_dir: Path to the log directory. + seed: Random seed. + n_initial_points: Number of initial points to explore. + n_calls: Number of iterations to optimize the hyperparameters. + folds_to_tune_on: Number of folds to tune on. + debug: Whether to load less data and enable more logging. + + Raises: + ValueError: If checkpoint is not None and the checkpoint does not exist. + """ + is_sepsis = task == "sepsis" + cv_repetitions = 5 + cv_repetitions_to_train = 5 + cv_folds = 5 + cv_folds_to_train = 5 + target_sizes = [500, 1000, 2000] + datasets = ["aumc", "hirid", "miiv"] if is_sepsis else ["aumc", "eicu", "hirid", "miiv"] + # old_run_dir = Path("../yaib_logs/DA_sep") + old_run_dir = Path("../DA_seps") if is_sepsis else Path("../DA_new") + task_dir = data_dir / task + model_path = Path("../yaib_models/best_models/") + + gin_config_before_tuning = gin.config_str() + + preprocessor = preprocessor(use_static_features=True) + + # evaluate models on same test split + data_dir = task_dir / dataset + source_datasets = [d for d in datasets if d != dataset] + log_full_line(f"STARTING {dataset}", char="#", num_newlines=2) + for target_size in target_sizes: + gin.clear_config() + gin.parse_config(gin_config_before_tuning) + log_full_line(f"STARTING TARGET SIZE {target_size}", char="*", num_newlines=1) + gin.bind_parameter("preprocess.fold_size", target_size) + log_dir = run_dir / task / model / dataset / f"target_{target_size}" + log_dir.mkdir(parents=True, exist_ok=True) + target_model_dir = old_run_dir / task / model / dataset / f"target_{target_size}" + if not (target_model_dir / "cv_rep_0" / "fold_0" / "train_config.gin").exists(): + choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug) + else: + gin.parse_config_file(target_model_dir / "cv_rep_0" / "fold_0" / "train_config.gin") + gin_config_with_target_hyperparameters = gin.config_str() + results = {} + loss_weighted_results = [] + for repetition in range(cv_repetitions_to_train): + loss_weighted_results.append({}) + agg_val_losses = [] + for fold_index in range(cv_folds_to_train): + gin.parse_config(gin_config_with_target_hyperparameters) + results[f"{repetition}_{fold_index}"] = {} + fold_results = results[f"{repetition}_{fold_index}"] + + data = preprocessor.apply(data, vars) + + log_dir_fold = log_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}" + log_dir_fold.mkdir(parents=True, exist_ok=True) + + # load or train target model + target_model_dir_fold = target_model_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}" + try: + target_model = load_model(target_model_dir_fold, log_dir_fold) + except: + logging.info("Model not found, training new model.") + target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) + + def get_preds(split): + if not (log_dir_fold / f"{split}_predictions.json").exists(): + predictions, labels = get_predictions_for_all_models( + target_model, + data, + log_dir_fold, + source_dir=model_path / task / model, + seed=seed, + source_datasets=source_datasets, + test_on=split, + ) + with open(log_dir_fold / f"{split}_predictions.json", "w") as f: + json.dump(predictions, f, cls=JsonResultLoggingEncoder) + else: + with open(log_dir_fold / f"{split}_predictions.json", "r") as f: + predictions = json.load(f) + _, labels = PredictionDataset(data, split=split).get_data_and_labels() + return predictions, labels + + # get predictions for train set + train_predictions, train_labels = get_preds("train") + test_predictions, test_labels = get_preds("test") + val_predictions, val_labels = get_preds("val") + val_losses = {baseline: log_loss(val_labels, predictions) for baseline, predictions in val_predictions.items()} + val_losses["target"] = log_loss(val_labels, val_predictions["target"]) + # logging.info("Validation AUCS: %s", val_aucs) + # logging.info("Validation losses: %s", val_losses) + agg_val_losses.append(val_losses) + + # evaluate baselines + for baseline, predictions in test_predictions.items(): + # logging.info("Evaluating model: {}".format(baseline)) + fold_results[baseline] = calculate_metrics(predictions, test_labels) + + # evaluate convex combination of models without target + test_predictions_list = list(test_predictions.values()) + test_predictions_list_without_target = test_predictions_list[1:] + weights_without_target = [1, 1] if is_sepsis else [1, 1, 1] + test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=weights_without_target) + fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) + + # evaluate max probability + max_pred = np.max(test_predictions_list, axis=0) + fold_results[f"max_prediction"] = calculate_metrics(max_pred, test_labels) + + # evaluate convex combination of models with target + weights = { + "aumc": 10535, + "eicu": 113382, + "hirid": 12859, + "miiv": 52045, + } + weights_without_target = [v for k, v in weights.items() if k in source_datasets] + target_weights = [0.5, 1, 2] + for t in target_weights: + w = [t * sum(weights_without_target)] + weights_without_target + # logging.info(f"Evaluating target weight: {t}") + test_pred = np.average(test_predictions_list, axis=0, weights=w) + fold_results[f"target_weight_{t}"] = calculate_metrics(test_pred, test_labels) + + # join predictions with static data and train new model + gin.clear_config() + gin.parse_config(gin_config_with_target_hyperparameters) + gin.bind_parameter("Transformer.emb", 103) + gin.bind_parameter("LSTMNet.input_dim", 103) + gin.bind_parameter("preprocess.fold_size", target_size) + data_with_predictions = preprocess_data( + data_dir, + seed=seed, + debug=debug, + use_cache=True, + cv_repetitions=cv_repetitions, + repetition_index=repetition, + cv_folds=cv_folds, + fold_index=fold_index, + ) + data_with_predictions["train"]["STATIC"] = data_with_predictions["train"]["STATIC"].join( + pd.DataFrame(list(train_predictions.values())[1:]).T + ) + data_with_predictions["val"]["STATIC"] = data_with_predictions["val"]["STATIC"].join( + pd.DataFrame(list(val_predictions.values())[1:]).T + ) + data_with_predictions["test"]["STATIC"] = data_with_predictions["test"]["STATIC"].join( + pd.DataFrame(list(test_predictions.values())[1:]).T + ) + model_type = gin.query_parameter("train_common.model") + if str(model_type) == "@DLWrapper()": + target_model_with_predictions = DLWrapper() + elif str(model_type) == "@MLWrapper()": + target_model_with_predictions = MLWrapper() + target_model_with_predictions.set_log_dir(log_dir_fold) + target_model_with_predictions.train( + PredictionDataset(data_with_predictions, split="train"), + PredictionDataset(data_with_predictions, split="val"), + "balanced", + seed, + ) + dataset_with_predictions = PredictionDataset(data_with_predictions, split="test") + preds_w_preds = target_model_with_predictions.predict(dataset_with_predictions, None, None) + if isinstance(target_model_with_predictions, MLWrapper): + preds_w_preds = preds_w_preds[:, 1] + fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels) + test_pred_with_preds = np.average( + [preds_w_preds] + test_predictions_list_without_target, + axis=0, + weights=[0.5 * sum(weights_without_target)] + weights_without_target, + ) + fold_results[f"cc_with_preds"] = calculate_metrics(test_pred_with_preds, test_labels) + + def convex_model_combination(model_weights): + val_pred = np.average(list(val_predictions.values()), axis=0, weights=model_weights) + return log_loss(val_labels, val_pred) + + logging.disable(logging.INFO) + res = gp_minimize( + convex_model_combination, + [(0.01, 1)] * len(datasets), + n_calls=50, + n_initial_points=10, + random_state=seed, + noise=1e-10, # the models are deterministic, but noise is needed for the gp to work + ) + logging.disable(logging.NOTSET) + best_model_weights = res.x + logging.info(best_model_weights) + test_pred = np.average(test_predictions_list, axis=0, weights=best_model_weights) + fold_results["bayes_opt"] = calculate_metrics(test_pred, test_labels) + + log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) + + # average results over folds + agg_aucs = {} + for fold_results in results.values(): + for source, metrics in fold_results.items(): + agg_aucs.setdefault(source, []).append(metrics["AUC"]) + + avg_aucs = {} + for source, aucs in agg_aucs.items(): + avg_aucs[source] = np.mean(aucs) + + avg_val_losses = np.array([np.mean([x[source] for x in agg_val_losses]) for source in val_losses.keys()]) + logging.info("Average validation losses: %s", dict(zip(val_losses.keys(), avg_val_losses))) + + weights = 1 / avg_val_losses + logging.info(f"weights: {weights}") + test_pred = np.average(test_predictions_list, axis=0, weights=weights) + loss_weighted_results[repetition]["loss_weighted"] = calculate_metrics(test_pred, test_labels) + avg_aucs["loss_weighted"] = loss_weighted_results[repetition]["loss_weighted"]["AUC"] + + weights = (1 / avg_val_losses) ** 2 + logging.info(f"weights: {weights}") + test_pred = np.average(test_predictions_list, axis=0, weights=weights) + loss_weighted_results[repetition]["squared_loss_weighted"] = calculate_metrics(test_pred, test_labels) + avg_aucs["squared_loss_weighted"] = loss_weighted_results[repetition]["squared_loss_weighted"]["AUC"] + + # print baselines first, then top three AUC, then top three loss + for source, auc in avg_aucs.items(): + logging.info(f"{source}: {auc}") + + log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) + + source_metrics = {} + for result in results.values(): + for source, source_stats in result.items(): + for metric, score in source_stats.items(): + if isinstance(score, (float, int)): + source_metrics.setdefault(source, {}).setdefault(metric, []).append(score) + for loss_weighted_result in loss_weighted_results: + for source, source_stats in loss_weighted_result.items(): + for metric, score in source_stats.items(): + if isinstance(score, (float, int)): + source_metrics.setdefault(source, {}).setdefault(metric, []).append(score) + + # Compute statistical metric over aggregated results + averaged_metrics = {} + for source, source_stats in source_metrics.items(): + for metric, scores in source_stats.items(): + averaged_metrics.setdefault(source, {}).setdefault(metric, []).append( + { + "avg": np.mean(scores), + "std": np.std(scores), + "CI_0.95": stats.t.interval(0.95, len(scores) - 1, loc=np.mean(scores), scale=stats.sem(scores)), + } + ) + + with open(log_dir / "aggregated_source_metrics.json", "w") as f: + json.dump(results, f, cls=JsonResultLoggingEncoder) + + with open(log_dir / "averaged_source_metrics.json", "w") as f: + json.dump(averaged_metrics, f, cls=JsonResultLoggingEncoder) + + logging.info(f"Averaged results: {averaged_metrics}") + log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5) + + log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=5) diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py index 0f8b7859..0a47cf7b 100644 --- a/icu_benchmarks/run.py +++ b/icu_benchmarks/run.py @@ -22,6 +22,7 @@ setup_logging, ) from icu_benchmarks.contants import RunMode +from icu_benchmarks.models import domain_adaptation @gin.configurable("Run") @@ -116,6 +117,15 @@ def main(my_args=tuple(sys.argv[1:])): run_dir = create_run_dir(log_dir) source_dir = args.source_dir gin.parse_config_file(source_dir / "train_config.gin") + if args.command == "da": + gin_config_files = ( + [Path(f"configs/experiments/{args.experiment}.gin")] + if args.experiment + else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")] + ) + gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False) + domain_adaptation(name, args.data_dir, args.log_dir, args.seed, args.task_name, model, debug=args.debug) + return else: # Train checkpoint = log_dir / args.checkpoint if args.checkpoint else None diff --git a/icu_benchmarks/run_utils.py b/icu_benchmarks/run_utils.py index 179ab1f5..07575bd7 100644 --- a/icu_benchmarks/run_utils.py +++ b/icu_benchmarks/run_utils.py @@ -90,6 +90,9 @@ def build_parser() -> ArgumentParser: evaluate.add_argument("-sn", "--source-name", required=True, type=Path, help="Name of the source dataset.") evaluate.add_argument("--source-dir", required=True, type=Path, help="Directory containing gin and model weights.") + # DOMAIN ADAPTATION ARGUMENTS + prep_and_train = subparsers.add_parser("da", help="Run DA experiment.", parents=[parent_parser]) + return parser diff --git a/scripts/results/da_results_to_latex.py b/scripts/results/da_results_to_latex.py new file mode 100644 index 00000000..a2e8cbd6 --- /dev/null +++ b/scripts/results/da_results_to_latex.py @@ -0,0 +1,69 @@ +import csv + +rawNamesMap = { + "target": "Target", + "aumc": "AUMCdb", + "eicu": "eICU", + "hirid": "HiRID", + "miiv": "MIMIC-IV", + "convex_combination_without_target": "Convex UDA", + "max_prediction": "Max Pooling", + "target_weight_0.5": "Weighted $\\alpha=1/3$", + "target_weight_2": "Weighted $\\alpha=2/3$", + "loss_weighted": "Weighted Loss", + "bayes_opt": "Weighted Bayes", + "target_with_predictions": "Prediction-Feature", + "cc_with_preds": "Combined", +} + +def csv_to_dict(file_name): + with open(file_name, 'r') as file: + reader = csv.DictReader(file) + data = [row for row in reader] + tables = {} + for row in data: + row_without_target = {key: value for key, value in row.items() if key != 'target' and key != 'target_size' and key != 'model'} + tables.setdefault((row['target'], row['target_size']), {})[row['model']] = row_without_target + return tables + + +def dict_to_latex(combination, data, metric): + table = '\\begin{table}[h]\n' + table += '\\centering\n' + table += '\\footnotesize' + table += '\\caption{{Sepsis prediction on {0} with target size {1}, {2} with standard deviation.}}\n'.format(rawNamesMap[combination[0]], combination[1], "AUROC" if metric == "auc" else "AUPRC") + headers = ['Model'] + for model, scores in data.items(): + headers += [model] + + table += '\\begin{tabular}{l|' + ''.join(['c'] * (len(headers) - 1)) + '}\n' + table += '\\textbf{' + '} & \\textbf{'.join(headers) + '}\\\\\n' + table += '\\hline\n' + + for score_name, score in data[model].items(): + if "_avg" in score_name: + raw_name = score_name.split("_avg")[0] + if raw_name == combination[0] or not raw_name in rawNamesMap: + continue + clean_name = rawNamesMap[raw_name] + values = [clean_name] + for model in headers[1:]: + scores = data[model] + avg = "{:.2f}".format(float(scores[score_name])) + std = "{:.2f}".format(float(scores[f"{raw_name}_std"])) + values.append(f"${avg} \pm {std}$") + table += ' & '.join(values) + '\\\\\n' + + table += '\\end{tabular}\n' + table += '\\end{table}\n' + return table + +if __name__ == '__main__': + for metric in ["auc", "pr"]: + file_name = f'../yaib_logs/sep_{metric}.csv' + data = csv_to_dict(file_name) + for key, row in data.items(): + table = dict_to_latex(key, row, metric) + print(table) + print('\n' * 5) + diff --git a/scripts/results/da_results_to_latex_sep.py b/scripts/results/da_results_to_latex_sep.py new file mode 100644 index 00000000..c0c0aafe --- /dev/null +++ b/scripts/results/da_results_to_latex_sep.py @@ -0,0 +1,69 @@ +import csv + +rawNamesMap = { + "target": "Target", + "aumc": "AUMCdb", + "eicu": "eICU", + "hirid": "HiRID", + "miiv": "MIMIC-IV", + "convex_combination_without_target": "Convex UDA", + "max_prediction": "Max Pooling", + "target_weight_0.5": "Weighted $\\alpha=1/3$", + "target_weight_2": "Weighted $\\alpha=2/3$", + "loss_weighted": "Weighted Loss", + "bayes_opt": "Weighted Bayes", + "target_with_predictions": "Prediction-Feature", + "cc_with_preds": "Combined", +} + +def csv_to_dict(file_name): + with open(file_name, 'r') as file: + reader = csv.DictReader(file) + data = [row for row in reader] + tables = {} + for row in data: + row_without_target = {key: value for key, value in row.items() if key != 'target' and key != 'target_size' and key != 'model'} + tables.setdefault((row['target']), {})[row['target_size']] = row_without_target + return tables + + +def dict_to_latex(combination, data, metric): + table = '\\begin{table}[h]\n' + table += '\\centering\n' + table += '\\footnotesize' + table += '\\caption{{Sepsis prediction on {0} with LGBM, {1} with standard deviation.}}\n'.format(rawNamesMap[combination], "AUROC" if metric == "auc" else "AUPRC") + headers = ['Target Size'] + for target_size, scores in data.items(): + headers += [target_size] + + table += '\\begin{tabular}{l|' + ''.join(['c'] * (len(headers) - 1)) + '}\n' + table += '\\textbf{' + '} & \\textbf{'.join(headers) + '}\\\\\n' + table += '\\hline\n' + + for score_name, score in data[target_size].items(): + if "_avg" in score_name: + raw_name = score_name.split("_avg")[0] + if raw_name == combination[0] or not raw_name in rawNamesMap: + continue + clean_name = rawNamesMap[raw_name] + values = [clean_name] + for target_size in headers[1:]: + scores = data[target_size] + avg = "{:.2f}".format(float(scores[score_name])) + std = "{:.2f}".format(float(scores[f"{raw_name}_std"])) + values.append(f"${avg} \pm {std}$") + table += ' & '.join(values) + '\\\\\n' + + table += '\\end{tabular}\n' + table += '\\end{table}\n' + return table + +if __name__ == '__main__': + for metric in ["auc", "pr"]: + file_name = f'../yaib_logs/sep_{metric}.csv' + data = csv_to_dict(file_name) + for key, row in data.items(): + table = dict_to_latex(key, row, metric) + print(table) + print('\n' * 5) + diff --git a/scripts/results/mortality_to_csv.py b/scripts/results/mortality_to_csv.py new file mode 100644 index 00000000..e380e578 --- /dev/null +++ b/scripts/results/mortality_to_csv.py @@ -0,0 +1,53 @@ +import json +from pathlib import Path +import csv + +models_dir = Path("../DA_new") +for metric in ["AUC", "PR"]: + for endpoint in models_dir.iterdir(): + if endpoint.is_dir(): + with open(models_dir / f"{endpoint.name}_{metric}_results.csv", "w") as csv_file: + writer = csv.writer(csv_file) + info = ["model", "target", "target_size"] + source_names = [ + "target", + "aumc", + "eicu", + "hirid", + "miiv", + "convex_combination_without_target", + "max_prediction", + "target_weight_0.5", + "target_weight_1", + "target_weight_2", + "loss_weighted", + "squared_loss_weighted", + "bayes_opt", + "target_with_predictions", + "cc_with_preds", + ] + stats_basis = ["avg", "std"] + stats = ["avg", "std"] + # combine fieldnames and stats + full_fields = [f"{source}_{stat}" for source in source_names for stat in stats] + writer = csv.DictWriter(csv_file, fieldnames=info + full_fields) + + writer.writeheader() + for model in endpoint.iterdir(): + for target in ["aumc", "eicu", "hirid", "miiv"]: + target_sizes = [500, 1000, 2000] + for target_size in target_sizes: + target_str = f"target_{target_size}" + if (model / target / target_str).exists(): + with open(model / target / target_str / "averaged_source_metrics.json", "r") as f: + results = json.load(f) + + row_data = {"model": model.name, "target": target, "target_size": target_size} + for stat in stats_basis: + for source, source_metrics in results.items(): + if stat == "CI_0.95": + row_data[f"{source}_{stat}_min"] = source_metrics[metric][0][stat][0] * 100 + row_data[f"{source}_{stat}_max"] = source_metrics[metric][0][stat][1] * 100 + else: + row_data[f"{source}_{stat}"] = source_metrics[metric][0][stat] * 100 + writer.writerow(row_data) diff --git a/scripts/results/sepsis_to_csv.py b/scripts/results/sepsis_to_csv.py new file mode 100644 index 00000000..813c3aeb --- /dev/null +++ b/scripts/results/sepsis_to_csv.py @@ -0,0 +1,52 @@ +import json +from pathlib import Path +import csv + +models_dir = Path("../DA_sep_new") +for metric in ["AUC", "PR"]: + for endpoint in models_dir.iterdir(): + if endpoint.is_dir(): + with open(models_dir / f"{endpoint.name}_{metric}_results.csv", "w") as csv_file: + writer = csv.writer(csv_file) + info = ["model", "target", "target_size"] + source_names = [ + "target", + "aumc", + "hirid", + "miiv", + "convex_combination_without_target", + "max_prediction", + "target_weight_0.5", + "target_weight_1", + "target_weight_2", + "loss_weighted", + "squared_loss_weighted", + "bayes_opt", + "target_with_predictions", + "cc_with_preds", + ] + stats_basis = ["avg", "std"] + stats = ["avg", "std"] + # combine fieldnames and stats + full_fields = [f"{source}_{stat}" for source in source_names for stat in stats] + writer = csv.DictWriter(csv_file, fieldnames=info + full_fields) + + writer.writeheader() + for model in endpoint.iterdir(): + for target in ["aumc", "hirid", "miiv"]: + target_sizes = [500, 1000, 2000] + for target_size in target_sizes: + target_str = f"target_{target_size}" + if (model / target / target_str).exists(): + with open(model / target / target_str / "averaged_source_metrics.json", "r") as f: + results = json.load(f) + + row_data = {"model": model.name, "target": target, "target_size": target_size} + for stat in stats_basis: + for source, source_metrics in results.items(): + if stat == "CI_0.95": + row_data[f"{source}_{stat}_min"] = source_metrics[metric][0][stat][0] * 100 + row_data[f"{source}_{stat}_max"] = source_metrics[metric][0][stat][1] * 100 + else: + row_data[f"{source}_{stat}"] = source_metrics[metric][0][stat] * 100 + writer.writerow(row_data)