From 5aaa4baa176ba710c219b17529eb67cdc2cad257 Mon Sep 17 00:00:00 2001 From: Ivan Knyazev <105492484+IRKnyazev@users.noreply.github.com> Date: Wed, 28 Aug 2024 14:19:19 +0700 Subject: [PATCH] [BUG] Short term solution for shapelet feature importance (#2017) * partial fixed to issue * added a warning message for rdst combined with linear classifiers --- aeon/visualisation/estimator/_shapelets.py | 31 +++++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/aeon/visualisation/estimator/_shapelets.py b/aeon/visualisation/estimator/_shapelets.py index 847a96381d..1feeb3ee04 100644 --- a/aeon/visualisation/estimator/_shapelets.py +++ b/aeon/visualisation/estimator/_shapelets.py @@ -5,6 +5,7 @@ __all__ = ["ShapeletClassifierVisualizer", "ShapeletTransformerVisualizer"] import copy +import warnings import numpy as np from sklearn.ensemble._forest import BaseForest @@ -681,13 +682,35 @@ def _get_shp_importance(self, class_id): if isinstance(classifier, Pipeline): classifier = classifier[-1] - # This suppose that the higher the coef linked to each feature, the most - # impact this feature makes on classification for the given class_id + # This supposes that the higher (with the exception of distance features) + # the coef linked to each feature, the most impact this feature makes on + # classification for the given class_id if isinstance(classifier, LinearClassifierMixin): coefs = classifier.coef_ n_classes = coefs.shape[0] if n_classes == 1: - coefs = np.append(-coefs, coefs, axis=0) + if isinstance(self.estimator, RDSTClassifier): + class_0_coefs = np.copy(coefs) + class_1_coefs = np.copy(coefs) + + mask = np.ones(class_0_coefs.shape[1], dtype=bool) + mask[::3] = False + class_0_coefs[:, mask] = -class_0_coefs[:, mask] + class_1_coefs[:, ::3] = -class_1_coefs[:, ::3] + + # Append the two modified coefs arrays along axis 0 + coefs = np.append(class_0_coefs, class_1_coefs, axis=0) + warnings.warn( + "Shapelet importance ranking may be unreliable " + "when using linear classifiers with RDST. " + "This is due to the interaction between argmin " + "and shapelet occurrence features, which can distort " + "the rankings. Consider evaluating the results carefully " + "or using an alternative method.", + stacklevel=1, + ) + else: + coefs = np.append(coefs, -coefs, axis=0) coefs = coefs[class_id] elif isinstance(classifier, (BaseForest, BaseDecisionTree)): @@ -699,7 +722,7 @@ def _get_shp_importance(self, class_id): "classifier inheriting from LinearClassifierMixin, BaseForest or " f"BaseDecisionTree but got {type(classifier)}" ) - # coefs = coefs[idx] + if isinstance(self.estimator, RDSTClassifier): # As each shapelet generate 3 features, divide feature id by 3 so all # features generated by one shapelet share the same ID