From c12ec4ac3e7f2ca1a84285254adc15f5b42b1e88 Mon Sep 17 00:00:00 2001
From: janezd <janez.demsar@fri.uni-lj.si>
Date: Wed, 13 Mar 2019 23:11:10 +0100
Subject: [PATCH] distances: pylint

---
 Orange/distance/base.py     | 47 +++++++++++++++++++++++++++++--------
 Orange/distance/distance.py | 37 +++++++++++++++++++----------
 2 files changed, 62 insertions(+), 22 deletions(-)

diff --git a/Orange/distance/base.py b/Orange/distance/base.py
index 83e2ae5cd90..517955a1138 100644
--- a/Orange/distance/base.py
+++ b/Orange/distance/base.py
@@ -1,3 +1,6 @@
+# This module defines abstract base classes; derived classes are abstract, too
+# pylint: disable=abstract-method
+
 import numpy as np
 import sklearn.metrics as skl_metrics
 
@@ -13,10 +16,11 @@
 # TODO this *private* function is called from several widgets to prepare
 # data for calling the below classes. After we (mostly) stopped relying
 # on sklearn.metrics, this is (mostly) unnecessary
-
+# Afterwards, also remove the following line:
+# pylint: disable=redefined-outer-name
 def _preprocess(table, impute=True):
     """Remove categorical attributes and impute missing values."""
-    if not len(table):
+    if not len(table):  # this can be an array, pylint: disable=len-as-condition
         return table
     new_domain = Domain(
         [a for a in table.domain.attributes if a.is_continuous],
@@ -120,6 +124,9 @@ class Distance:
         impute (bool):
             if `True` (default is `False`), nans in the computed distances
             are replaced with zeros, and infs with very large numbers.
+        normalize (bool):
+            if `True`, columns are normalized before computation. This attribute
+            applies only if the distance supports normalization.
 
     The capabilities of the metrics are described with class attributes.
 
@@ -150,6 +157,11 @@ class Distance:
     supports_normalization = False
     supports_missing = True
 
+    # Predefined here to silence pylint, which doesn't look into __new__
+    normalize = False
+    axis = 1
+    impute = False
+
     def __new__(cls, e1=None, e2=None, axis=1, impute=False, **kwargs):
         self = super().__new__(cls)
         self.axis = axis
@@ -168,16 +180,25 @@ def __new__(cls, e1=None, e2=None, axis=1, impute=False, **kwargs):
                 or hasattr(e1, "is_sparse") and e1.is_sparse()):
             fallback = getattr(self, "fallback", None)
             if fallback is not None:
-                # pylint disable=not-callable
+                # pylint: disable=not-callable
                 return fallback(e1, e2, axis, impute)
 
         # Magic constructor
         model = self.fit(e1)
         return model(e1, e2)
 
-    def fit(self, e1):
-        """Abstract method returning :obj:`DistanceModel` fit to the data"""
-        pass
+    def fit(self, data):
+        """
+        Abstract method returning :obj:`DistanceModel` fit to the data
+
+        Args:
+            e1 (Orange.data.Table, Orange.data.Instance, np.ndarray):
+                data for fitting the distance model
+
+        Returns:
+            model (DistanceModel)
+        """
+        raise NotImplementedError
 
     @staticmethod
     def check_no_discrete(n_vals):
@@ -256,7 +277,7 @@ def compute_distances(self, x1, x2):
         """
         Abstract method for computation of distances between rows or columns of
         `x1`, or between rows of `x1` and `x2`. Do not call directly."""
-        pass
+        raise NotImplementedError
 
 
 class FittedDistanceModel(DistanceModel):
@@ -268,10 +289,15 @@ class FittedDistanceModel(DistanceModel):
         attributes (list of `Variable`): attributes on which the model was fit
         discrete (np.ndarray): bool array indicating discrete attributes
         continuous (np.ndarray): bool array indicating continuous attributes
+        normalize (bool):
+            if `True` (default is `False`) continuous columns are normalized
     """
     def __init__(self, attributes, axis=1, impute=False):
         super().__init__(axis, impute)
         self.attributes = attributes
+        self.discrete = None
+        self.continuous = None
+        self.normalize = False
 
     def __call__(self, e1, e2=None):
         if self.attributes is not None and (
@@ -373,7 +399,7 @@ def fit_cols(self, attributes, x, n_vals):
             x (np.ndarray): data
             n_vals (np.ndarray): number of attribute values, 0 for continuous
         """
-        pass
+        raise NotImplementedError
 
     def fit_rows(self, attributes, x, n_vals):
         """
@@ -440,7 +466,8 @@ def fit_rows(self, attributes, x, n_vals):
             dist_missing2_cont[:curr_cont],
             dist_missing_disc, dist_missing2_disc)
 
-    def get_discrete_stats(self, column, n_bins):
+    @staticmethod
+    def get_discrete_stats(column, n_bins):
         """
         Return tables used computing distance between missing discrete values.
 
@@ -474,7 +501,7 @@ def get_continuous_stats(self, column):
             dist_missing2_cont (float): the value used for distance between two
                 missing values in column
         """
-        pass
+        raise NotImplementedError
 
 
 # Fallbacks for distances in sparse data
diff --git a/Orange/distance/distance.py b/Orange/distance/distance.py
index 609f6b6f9b1..cfd7fe43af7 100644
--- a/Orange/distance/distance.py
+++ b/Orange/distance/distance.py
@@ -14,6 +14,7 @@
 from .base import (Distance, DistanceModel, FittedDistance, FittedDistanceModel,
                    SklDistance, _orange_to_numpy)
 
+
 class EuclideanRowsModel(FittedDistanceModel):
     """
     Model for computation of Euclidean distances between rows.
@@ -23,14 +24,14 @@ class EuclideanRowsModel(FittedDistanceModel):
     """
     def __init__(self, attributes, impute, normalize,
                  continuous, discrete,
-                 means, vars, dist_missing2_cont,
+                 means, stdvars, dist_missing2_cont,
                  dist_missing_disc, dist_missing2_disc):
         super().__init__(attributes, 1, impute)
         self.normalize = normalize
         self.continuous = continuous
         self.discrete = discrete
         self.means = means
-        self.vars = vars
+        self.vars = stdvars
         self.dist_missing2_cont = dist_missing2_cont
         self.dist_missing_disc = dist_missing_disc
         self.dist_missing2_disc = dist_missing2_disc
@@ -91,11 +92,11 @@ class EuclideanColumnsModel(FittedDistanceModel):
     Means are used as offsets for normalization, and two deviations are
     used for scaling.
     """
-    def __init__(self, attributes, impute, normalize, means, vars):
+    def __init__(self, attributes, impute, normalize, means, stdvars):
         super().__init__(attributes, 0, impute)
         self.normalize = normalize
         self.means = means
-        self.vars = vars
+        self.vars = stdvars
 
     def compute_distances(self, x1, x2=None):
         """
@@ -134,6 +135,7 @@ class Euclidean(FittedDistance):
     rows_model_type = EuclideanRowsModel
 
     def __new__(cls, e1=None, e2=None, axis=1, impute=False, normalize=False):
+        # pylint: disable=arguments-differ
         return super().__new__(cls, e1, e2, axis, impute, normalize=normalize)
 
     def get_continuous_stats(self, column):
@@ -160,9 +162,8 @@ def fit_cols(self, attributes, x, n_vals):
         for normalization and imputation.
         """
         def nowarn(msg, cat, *args, **kwargs):
-            if cat is RuntimeWarning and (
-                    msg == "Mean of empty slice"
-                    or msg == "Degrees of freedom <= 0 for slice"):
+            if cat is RuntimeWarning and msg in (
+                    "Mean of empty slice", "Degrees of freedom <= 0 for slice"):
                 if self.normalize:
                     raise ValueError("some columns have no defined values")
             else:
@@ -174,11 +175,11 @@ def nowarn(msg, cat, *args, **kwargs):
         orig_warn = warnings.warn
         with patch("warnings.warn", new=nowarn):
             means = np.nanmean(x, axis=0)
-            vars = np.nanvar(x, axis=0)
-        if self.normalize and not vars.all():
+            stdvars = np.nanvar(x, axis=0)
+        if self.normalize and not stdvars.all():
             raise ValueError("some columns are constant")
         return EuclideanColumnsModel(
-            attributes, self.impute, self.normalize, means, vars)
+            attributes, self.impute, self.normalize, means, stdvars)
 
 
 class ManhattanRowsModel(FittedDistanceModel):
@@ -270,6 +271,7 @@ class Manhattan(FittedDistance):
     rows_model_type = ManhattanRowsModel
 
     def __new__(cls, e1=None, e2=None, axis=1, impute=False, normalize=False):
+        # pylint: disable=arguments-differ
         return super().__new__(cls, e1, e2, axis, impute, normalize=normalize)
 
     def get_continuous_stats(self, column):
@@ -337,6 +339,10 @@ def fit_rows(self, attributes, x, n_vals):
 
     fit_cols = fit_rows
 
+    def get_continuous_stats(self, column):
+        # Implement an unneeded abstract method to silence pylint
+        return None
+
     class CosineModel(FittedDistanceModel):
         """Model for computation of cosine distances across rows and columns.
         All non-zero discrete values are treated as 1."""
@@ -402,6 +408,7 @@ def _compute_dense(self, x1, x2):
         compute distances between rows without missing values, and a slower
         loop for those with missing values.
         """
+        # view is false positive, pylint: disable=no-member
         nonzeros1 = np.not_equal(x1, 0).view(np.int8)
         if self.axis == 1:
             nans1 = _distance.any_nan_row(x1)
@@ -421,7 +428,8 @@ def _compute_dense(self, x1, x2):
             return _distance.jaccard_cols(
                 nonzeros1, x1, nans1, self.ps)
 
-    def _compute_sparse(self, x1, x2=None):
+    @staticmethod
+    def _compute_sparse(x1, x2=None):
         symmetric = x2 is None
         if symmetric:
             x2 = x1
@@ -462,6 +470,10 @@ def fit_rows(self, attributes, x, n_vals):
 
     fit_cols = fit_rows
 
+    def get_continuous_stats(self, column):
+        # Implement an unneeded abstract method to silence pylint
+        return None
+
 
 class CorrelationDistanceModel(DistanceModel):
     """Helper class for normal and absolute Pearson and Spearman correlation"""
@@ -561,7 +573,7 @@ def _corrcoef2(a, b, axis=0):
     numpy.corrcoef
     """
     a, b = np.atleast_2d(a, b)
-    if not (axis == 0 or axis == 1):
+    if axis not in (0, 1):
         raise ValueError("Invalid axis {} (only 0 or 1 accepted)".format(axis))
 
     mean_a = np.mean(a, axis=axis, keepdims=True)
@@ -597,6 +609,7 @@ def _corrcoef2(a, b, axis=0):
 
 
 class CorrelationDistance(Distance):
+    # pylint: disable=abstract-method
     supports_missing = False