diff --git a/CHANGELOG.md b/CHANGELOG.md index 1831161..faf6f90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ Changelog ========= +Version 0.93.2 +-------------- +* changed class_label in plots to actual target + Version 0.93.1 -------------- * made explore module more robust diff --git a/nkululeko/constants.py b/nkululeko/constants.py index 43b781d..c895628 100644 --- a/nkululeko/constants.py +++ b/nkululeko/constants.py @@ -1,2 +1,2 @@ -VERSION="0.93.1" +VERSION="0.93.2" SAMPLING_RATE = 16000 diff --git a/nkululeko/experiment.py b/nkululeko/experiment.py index 89303d4..6a9ede5 100644 --- a/nkululeko/experiment.py +++ b/nkululeko/experiment.py @@ -434,7 +434,9 @@ def autopredict(self): f"unknown augmentation selection specifier {sample_selection}," " should be [all | train | test]" ) - targets = self.util.config_val_list("PREDICT", "targets", ["gender"]) + targets = self.util.config_val_list("PREDICT", "targets", None) + if targets is None: + self.util.error("no prediction target specified") for target in targets: if target == "speaker": from nkululeko.autopredict.ap_sid import SIDPredictor diff --git a/nkululeko/plots.py b/nkululeko/plots.py index eeb0931..2143faa 100644 --- a/nkululeko/plots.py +++ b/nkululeko/plots.py @@ -8,6 +8,8 @@ import seaborn as sns from sklearn.manifold import TSNE +from audmetric import concordance_cc as ccc + import nkululeko.glob_conf as glob_conf from nkululeko.reporting.defines import Header from nkululeko.reporting.report_item import ReportItem @@ -239,28 +241,54 @@ def _check_binning(self, att, df): def _plot2cont_cat(self, df, cont1, cont2, cat, ylab): """Plot relation of two continuous distributions with one categorical.""" + if cont2 == "class_label": + df.rename(columns={cont2: self.target}) + cont2 = self.target + if cont1 == "class_label": + df.rename(columns={cont1: self.target}) + cont1 = self.target + if cat == "class_label": + df.rename(columns={cat: self.target}) + cat = self.target pearson = stats.pearsonr(df[cont1], df[cont2]) # trunc to three digits pearson = int(pearson[0] * 1000) / 1000 pearson_string = f"PCC: {pearson}" + ccc_val = ccc(df[cont1], df[cont2]) + ccc_val = int(ccc_val * 1000) / 1000 + ccc_string = f"CCC: {ccc_val}" ax = sns.lmplot(data=df, x=cont1, y=cont2, hue=cat) - caption = f"{ylab} {df.shape[0]}. {pearson_string}" + caption = f"{ylab} {df.shape[0]}. {pearson_string} {ccc_string}" ax.figure.suptitle(caption) return ax, caption def _plot2cont(self, df, col1, col2, ylab): """Plot relation of two continuous distributions.""" + # rename "class_label" to the original target + if col2 == "class_label": + df.rename(columns={col2: self.target}) + col2 = self.target + if col1 == "class_label": + df.rename(columns={col1: self.target}) + col1 = self.target pearson = stats.pearsonr(df[col1], df[col2]) # trunc to three digits pearson = int(pearson[0] * 1000) / 1000 pearson_string = f"PCC: {pearson}" + ccc_val = ccc(df[cont1], df[cont2]) + ccc_val = int(ccc_val * 1000) / 1000 + ccc_string = f"CCC: {ccc_val}" ax = sns.lmplot(data=df, x=col1, y=col2) - caption = f"{ylab} {df.shape[0]}. {pearson_string}" + caption = f"{ylab} {df.shape[0]}. {pearson_string} {ccc_string}" ax.figure.suptitle(caption) return ax, caption def plotcatcont(self, df, cat_col, cont_col, xlab, ylab): """Plot relation of categorical distribution with continuous.""" + # rename "class_label" to the original target + if cat_col == "class_label": + df.rename(columns={cat_col: self.target}) + cat_col = self.target dist_type = self.util.config_val("EXPL", "dist_type", "kde") cats, cat_str, es = su.get_effect_size(df, cat_col, cont_col) model_type = self.util.get_model_type() diff --git a/nkululeko/utils/util.py b/nkululeko/utils/util.py index 0b46986..2ea1701 100644 --- a/nkululeko/utils/util.py +++ b/nkululeko/utils/util.py @@ -226,7 +226,10 @@ def get_target_name(self): return self.config["DATA"]["target"] def get_model_type(self): - return self.config["MODEL"]["type"] + try: + return self.config["MODEL"]["type"] + except KeyError: + return "" def get_model_description(self): mt = ""