From c12ec4ac3e7f2ca1a84285254adc15f5b42b1e88 Mon Sep 17 00:00:00 2001 From: janezd Date: Wed, 13 Mar 2019 23:11:10 +0100 Subject: [PATCH] distances: pylint --- Orange/distance/base.py | 47 +++++++++++++++++++++++++++++-------- Orange/distance/distance.py | 37 +++++++++++++++++++---------- 2 files changed, 62 insertions(+), 22 deletions(-) diff --git a/Orange/distance/base.py b/Orange/distance/base.py index 83e2ae5cd90..517955a1138 100644 --- a/Orange/distance/base.py +++ b/Orange/distance/base.py @@ -1,3 +1,6 @@ +# This module defines abstract base classes; derived classes are abstract, too +# pylint: disable=abstract-method + import numpy as np import sklearn.metrics as skl_metrics @@ -13,10 +16,11 @@ # TODO this *private* function is called from several widgets to prepare # data for calling the below classes. After we (mostly) stopped relying # on sklearn.metrics, this is (mostly) unnecessary - +# Afterwards, also remove the following line: +# pylint: disable=redefined-outer-name def _preprocess(table, impute=True): """Remove categorical attributes and impute missing values.""" - if not len(table): + if not len(table): # this can be an array, pylint: disable=len-as-condition return table new_domain = Domain( [a for a in table.domain.attributes if a.is_continuous], @@ -120,6 +124,9 @@ class Distance: impute (bool): if `True` (default is `False`), nans in the computed distances are replaced with zeros, and infs with very large numbers. + normalize (bool): + if `True`, columns are normalized before computation. This attribute + applies only if the distance supports normalization. The capabilities of the metrics are described with class attributes. @@ -150,6 +157,11 @@ class Distance: supports_normalization = False supports_missing = True + # Predefined here to silence pylint, which doesn't look into __new__ + normalize = False + axis = 1 + impute = False + def __new__(cls, e1=None, e2=None, axis=1, impute=False, **kwargs): self = super().__new__(cls) self.axis = axis @@ -168,16 +180,25 @@ def __new__(cls, e1=None, e2=None, axis=1, impute=False, **kwargs): or hasattr(e1, "is_sparse") and e1.is_sparse()): fallback = getattr(self, "fallback", None) if fallback is not None: - # pylint disable=not-callable + # pylint: disable=not-callable return fallback(e1, e2, axis, impute) # Magic constructor model = self.fit(e1) return model(e1, e2) - def fit(self, e1): - """Abstract method returning :obj:`DistanceModel` fit to the data""" - pass + def fit(self, data): + """ + Abstract method returning :obj:`DistanceModel` fit to the data + + Args: + e1 (Orange.data.Table, Orange.data.Instance, np.ndarray): + data for fitting the distance model + + Returns: + model (DistanceModel) + """ + raise NotImplementedError @staticmethod def check_no_discrete(n_vals): @@ -256,7 +277,7 @@ def compute_distances(self, x1, x2): """ Abstract method for computation of distances between rows or columns of `x1`, or between rows of `x1` and `x2`. Do not call directly.""" - pass + raise NotImplementedError class FittedDistanceModel(DistanceModel): @@ -268,10 +289,15 @@ class FittedDistanceModel(DistanceModel): attributes (list of `Variable`): attributes on which the model was fit discrete (np.ndarray): bool array indicating discrete attributes continuous (np.ndarray): bool array indicating continuous attributes + normalize (bool): + if `True` (default is `False`) continuous columns are normalized """ def __init__(self, attributes, axis=1, impute=False): super().__init__(axis, impute) self.attributes = attributes + self.discrete = None + self.continuous = None + self.normalize = False def __call__(self, e1, e2=None): if self.attributes is not None and ( @@ -373,7 +399,7 @@ def fit_cols(self, attributes, x, n_vals): x (np.ndarray): data n_vals (np.ndarray): number of attribute values, 0 for continuous """ - pass + raise NotImplementedError def fit_rows(self, attributes, x, n_vals): """ @@ -440,7 +466,8 @@ def fit_rows(self, attributes, x, n_vals): dist_missing2_cont[:curr_cont], dist_missing_disc, dist_missing2_disc) - def get_discrete_stats(self, column, n_bins): + @staticmethod + def get_discrete_stats(column, n_bins): """ Return tables used computing distance between missing discrete values. @@ -474,7 +501,7 @@ def get_continuous_stats(self, column): dist_missing2_cont (float): the value used for distance between two missing values in column """ - pass + raise NotImplementedError # Fallbacks for distances in sparse data diff --git a/Orange/distance/distance.py b/Orange/distance/distance.py index 609f6b6f9b1..cfd7fe43af7 100644 --- a/Orange/distance/distance.py +++ b/Orange/distance/distance.py @@ -14,6 +14,7 @@ from .base import (Distance, DistanceModel, FittedDistance, FittedDistanceModel, SklDistance, _orange_to_numpy) + class EuclideanRowsModel(FittedDistanceModel): """ Model for computation of Euclidean distances between rows. @@ -23,14 +24,14 @@ class EuclideanRowsModel(FittedDistanceModel): """ def __init__(self, attributes, impute, normalize, continuous, discrete, - means, vars, dist_missing2_cont, + means, stdvars, dist_missing2_cont, dist_missing_disc, dist_missing2_disc): super().__init__(attributes, 1, impute) self.normalize = normalize self.continuous = continuous self.discrete = discrete self.means = means - self.vars = vars + self.vars = stdvars self.dist_missing2_cont = dist_missing2_cont self.dist_missing_disc = dist_missing_disc self.dist_missing2_disc = dist_missing2_disc @@ -91,11 +92,11 @@ class EuclideanColumnsModel(FittedDistanceModel): Means are used as offsets for normalization, and two deviations are used for scaling. """ - def __init__(self, attributes, impute, normalize, means, vars): + def __init__(self, attributes, impute, normalize, means, stdvars): super().__init__(attributes, 0, impute) self.normalize = normalize self.means = means - self.vars = vars + self.vars = stdvars def compute_distances(self, x1, x2=None): """ @@ -134,6 +135,7 @@ class Euclidean(FittedDistance): rows_model_type = EuclideanRowsModel def __new__(cls, e1=None, e2=None, axis=1, impute=False, normalize=False): + # pylint: disable=arguments-differ return super().__new__(cls, e1, e2, axis, impute, normalize=normalize) def get_continuous_stats(self, column): @@ -160,9 +162,8 @@ def fit_cols(self, attributes, x, n_vals): for normalization and imputation. """ def nowarn(msg, cat, *args, **kwargs): - if cat is RuntimeWarning and ( - msg == "Mean of empty slice" - or msg == "Degrees of freedom <= 0 for slice"): + if cat is RuntimeWarning and msg in ( + "Mean of empty slice", "Degrees of freedom <= 0 for slice"): if self.normalize: raise ValueError("some columns have no defined values") else: @@ -174,11 +175,11 @@ def nowarn(msg, cat, *args, **kwargs): orig_warn = warnings.warn with patch("warnings.warn", new=nowarn): means = np.nanmean(x, axis=0) - vars = np.nanvar(x, axis=0) - if self.normalize and not vars.all(): + stdvars = np.nanvar(x, axis=0) + if self.normalize and not stdvars.all(): raise ValueError("some columns are constant") return EuclideanColumnsModel( - attributes, self.impute, self.normalize, means, vars) + attributes, self.impute, self.normalize, means, stdvars) class ManhattanRowsModel(FittedDistanceModel): @@ -270,6 +271,7 @@ class Manhattan(FittedDistance): rows_model_type = ManhattanRowsModel def __new__(cls, e1=None, e2=None, axis=1, impute=False, normalize=False): + # pylint: disable=arguments-differ return super().__new__(cls, e1, e2, axis, impute, normalize=normalize) def get_continuous_stats(self, column): @@ -337,6 +339,10 @@ def fit_rows(self, attributes, x, n_vals): fit_cols = fit_rows + def get_continuous_stats(self, column): + # Implement an unneeded abstract method to silence pylint + return None + class CosineModel(FittedDistanceModel): """Model for computation of cosine distances across rows and columns. All non-zero discrete values are treated as 1.""" @@ -402,6 +408,7 @@ def _compute_dense(self, x1, x2): compute distances between rows without missing values, and a slower loop for those with missing values. """ + # view is false positive, pylint: disable=no-member nonzeros1 = np.not_equal(x1, 0).view(np.int8) if self.axis == 1: nans1 = _distance.any_nan_row(x1) @@ -421,7 +428,8 @@ def _compute_dense(self, x1, x2): return _distance.jaccard_cols( nonzeros1, x1, nans1, self.ps) - def _compute_sparse(self, x1, x2=None): + @staticmethod + def _compute_sparse(x1, x2=None): symmetric = x2 is None if symmetric: x2 = x1 @@ -462,6 +470,10 @@ def fit_rows(self, attributes, x, n_vals): fit_cols = fit_rows + def get_continuous_stats(self, column): + # Implement an unneeded abstract method to silence pylint + return None + class CorrelationDistanceModel(DistanceModel): """Helper class for normal and absolute Pearson and Spearman correlation""" @@ -561,7 +573,7 @@ def _corrcoef2(a, b, axis=0): numpy.corrcoef """ a, b = np.atleast_2d(a, b) - if not (axis == 0 or axis == 1): + if axis not in (0, 1): raise ValueError("Invalid axis {} (only 0 or 1 accepted)".format(axis)) mean_a = np.mean(a, axis=axis, keepdims=True) @@ -597,6 +609,7 @@ def _corrcoef2(a, b, axis=0): class CorrelationDistance(Distance): + # pylint: disable=abstract-method supports_missing = False