Skip to content

Commit

Permalink
[BUG] Short term solution for shapelet feature importance (#2017)
Browse files Browse the repository at this point in the history
* partial fixed to issue

* added a warning message for rdst combined with linear classifiers
  • Loading branch information
IRKnyazev authored Aug 28, 2024
1 parent 75e8ef5 commit 5aaa4ba
Showing 1 changed file with 27 additions and 4 deletions.
31 changes: 27 additions & 4 deletions aeon/visualisation/estimator/_shapelets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
__all__ = ["ShapeletClassifierVisualizer", "ShapeletTransformerVisualizer"]

import copy
import warnings

import numpy as np
from sklearn.ensemble._forest import BaseForest
Expand Down Expand Up @@ -681,13 +682,35 @@ def _get_shp_importance(self, class_id):
if isinstance(classifier, Pipeline):
classifier = classifier[-1]

# This suppose that the higher the coef linked to each feature, the most
# impact this feature makes on classification for the given class_id
# This supposes that the higher (with the exception of distance features)
# the coef linked to each feature, the most impact this feature makes on
# classification for the given class_id
if isinstance(classifier, LinearClassifierMixin):
coefs = classifier.coef_
n_classes = coefs.shape[0]
if n_classes == 1:
coefs = np.append(-coefs, coefs, axis=0)
if isinstance(self.estimator, RDSTClassifier):
class_0_coefs = np.copy(coefs)
class_1_coefs = np.copy(coefs)

mask = np.ones(class_0_coefs.shape[1], dtype=bool)
mask[::3] = False
class_0_coefs[:, mask] = -class_0_coefs[:, mask]
class_1_coefs[:, ::3] = -class_1_coefs[:, ::3]

# Append the two modified coefs arrays along axis 0
coefs = np.append(class_0_coefs, class_1_coefs, axis=0)
warnings.warn(
"Shapelet importance ranking may be unreliable "
"when using linear classifiers with RDST. "
"This is due to the interaction between argmin "
"and shapelet occurrence features, which can distort "
"the rankings. Consider evaluating the results carefully "
"or using an alternative method.",
stacklevel=1,
)
else:
coefs = np.append(coefs, -coefs, axis=0)
coefs = coefs[class_id]

elif isinstance(classifier, (BaseForest, BaseDecisionTree)):
Expand All @@ -699,7 +722,7 @@ def _get_shp_importance(self, class_id):
"classifier inheriting from LinearClassifierMixin, BaseForest or "
f"BaseDecisionTree but got {type(classifier)}"
)
# coefs = coefs[idx]

if isinstance(self.estimator, RDSTClassifier):
# As each shapelet generate 3 features, divide feature id by 3 so all
# features generated by one shapelet share the same ID
Expand Down

0 comments on commit 5aaa4ba

Please sign in to comment.