Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docstrings for ShaRP methods #54

Merged
merged 1 commit into from
Nov 26, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 140 additions & 9 deletions sharp/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,16 @@ def __init__(
self._y = kwargs["y"] if "y" in kwargs.keys() else None

def fit(self, X, y=None):
"""
Fit a ShaRP model to the given data.

Parameters
----------
X: array-like, shape (n_samples, n_features)
Reference dataset used to compute explanations.
y: array-like, shape (n_samples,), default=None
Target variable.
"""
X_, y_ = check_inputs(X, y)

self._X = X_
Expand Down Expand Up @@ -106,7 +116,31 @@ def fit(self, X, y=None):

def individual(self, sample, X=None, y=None, **kwargs):
"""
set_cols_idx should be passed in kwargs if measure is marginal
Provides an explanation for individual sample point based on reference dataset

.. note:: set_cols_idx should be passed in kwargs if measure is marginal

Parameters
----------
sample : array-like, shape (n_features,) or int
Sample to calculate explanation for.
Can be passed directly or as an index in a reference dataset.
X : array-like, shape (n_samples, n_features), default=None
Reference dataset used to compute explanations.
y : array-like, shape (n_samples,), default=None
Target variable.
set_cols_idx : 1D array-like, default=None
Features in the coalition used to construct composite points to estimate
feature importance.
coalition_size : int, default=n_features-1
Maximum number of features used during the construction of composite points.
sample_size : int, default=n_samples
Maximum number of samples used during the construction of composite points.

Returns
-------
1D array-like, shape (n_features,)
Influences of each feature on individual sample.
"""
if X is None:
X = self.qoi_.X
Expand Down Expand Up @@ -157,7 +191,31 @@ def individual(self, sample, X=None, y=None, **kwargs):

def feature(self, feature, X=None, y=None, **kwargs):
"""
set_cols_idx should be passed in kwargs if measure is marginal
Provides an explanation for all sample points for a specified feature
based on reference dataset

.. note:: set_cols_idx should be passed in kwargs if measure is marginal

Parameters
----------
feature : str or int
Name or index of the targeted feature
X : array-like, shape (n_samples, n_features), default=None
Reference dataset used to compute explanations.
y : array-like, shape (n_samples,), default=None
Target variable.
set_cols_idx : 1D array-like, default=None
Features in the coalition used to construct composite points to estimate
feature importance.
coalition_size : int, default=n_features-1
Maximum number of features used during the construction of composite points.
sample_size : int, default=n_samples
Maximum number of samples used during the construction of composite points.

Returns
-------
float
Average contributions of a specific feature along all sample points.
"""
X_, y_ = check_inputs(X, y)

Expand Down Expand Up @@ -204,7 +262,30 @@ def feature(self, feature, X=None, y=None, **kwargs):

def all(self, X=None, y=None, **kwargs):
"""
set_cols_idx should be passed in kwargs if measure is marginal
Provides an explanation for all sample points based on reference dataset

.. note:: set_cols_idx should be passed in kwargs if measure is marginal

Parameters
----------
feature : str or int
Name or index of the targeted feature
X : array-like, shape (n_samples, n_features), default=None
Reference dataset used to compute explanations.
y : array-like, shape (n_samples,), default=None
Target variable.
set_cols_idx : 1D array-like, default=None
Features in the coalition used to construct composite points to estimate
feature importance.
coalition_size : int, default=n_features-1
Maximum number of features used during the construction of composite points.
sample_size : int, default=n_samples
Maximum number of samples used during the construction of composite points.

Returns
-------
array-like, shape (n_samples, n_features)
Contribution of each feature to a point's qoi
"""
X_ref = self._X if self._X is not None else check_inputs(X)[0]
X_, y_ = check_inputs(X, y)
Expand All @@ -223,10 +304,33 @@ def all(self, X=None, y=None, **kwargs):
def pairwise(self, sample1, sample2, **kwargs):
"""
Compare two samples, or one sample against a set of samples.
If `sample1` or `sample2` are of type `int` or `list`, `X` also needs to be
passed.

set_cols_idx should be passed in kwargs if measure is marginal
If `sample1` or `sample2` are of type `int` or `list`, `X` also needs
to be passed.

.. note:: set_cols_idx should be passed in kwargs if measure is marginal

Parameters
----------
sample1 : array-like or int or list
Sample or indices of samples that are used to calculate contributions.
sample2 : array-like or int or list
Sample or indices of samples against which contributions are calculated.
X : array-like, shape (n_samples, n_features), default=None
Reference dataset used to compute explanations.
y : array-like, shape (n_samples,), default=None
Target variable.
set_cols_idx : 1D array-like, default=None
Features in the coalition used to construct composite points to estimate
feature importance.
coalition_size : int, default=n_features-1
Maximum number of features used during the construction of composite points.
sample_size : int, default=n_samples
Maximum number of samples used during the construction of composite points.

Returns
-------
array-like
Contributions of each feature to each `sample1` point's qoi
"""
if "X" in kwargs.keys():
X = kwargs["X"]
Expand Down Expand Up @@ -267,8 +371,35 @@ def pairwise(self, sample1, sample2, **kwargs):

def pairwise_set(self, samples1, samples2, **kwargs):
"""
set_cols_idx should be passed in kwargs if measure is marginal
pairs is a list of tuples of indexes
Pairwise comparison of two samples sets.

.. note:: if elements of `samples1` or `samples2` are of type `int` or `list`,
`X` also needs to be passed.
.. note:: set_cols_idx should be passed in kwargs if measure is marginal

Parameters
----------
samples1 : array-like
Set of samples or indices that are used to calculate contributions.
samples2 : array-like
Set of samples or indices against which contributions are calculated.
X : array-like, shape (n_samples, n_features), default=None
Reference dataset used to compute explanations.
y : array-like, shape (n_samples,), default=None
Target variable.
set_cols_idx : 1D array-like, default=None
Features in the coalition used to construct composite points to estimate
feature importance.
coalition_size : int, default=n_features-1
Maximum number of features used during the construction of composite points.
sample_size : int, default=n_samples
Maximum number of samples used during the construction of composite points.

Returns
-------
array-like
Contributions for each sample from `samples1` against respective sample in
`samples2`
"""
contributions = parallel_loop(
lambda samples: self.pairwise(*samples, verbose=False, **kwargs),
Expand Down
Loading