Adjusting some comments and general linting stuff

NannyML · Jul 19, 2024 · c8409db · c8409db
1 parent 6100ab8
commit c8409db
Show file tree

Hide file tree

Showing 6 changed files with 43 additions and 21 deletions.
diff --git a/nannyml/data_quality/__init__.py b/nannyml/data_quality/__init__.py
@@ -7,4 +7,4 @@
 
 from .missing import MissingValuesCalculator
 from .unseen import UnseenValuesCalculator
-from .range import NumericalRangeCalculator
+from .range import NumericalRangeCalculator
diff --git a/nannyml/data_quality/range/calculator.py b/nannyml/data_quality/range/calculator.py
@@ -2,7 +2,7 @@
 #
 #  License: Apache Software License 2.0
 
-"""Continous numerical variable range monitor to ensure range supplied is within training bounds."""
+"""Continuous numerical variable range monitor to ensure range supplied is within training bounds."""
 
 from typing import Any, Dict, List, Optional, Union
 
@@ -15,7 +15,6 @@
 from nannyml.exceptions import InvalidArgumentsException
 from nannyml.thresholds import Threshold, calculate_threshold_values, ConstantThreshold
 from nannyml.usage_logging import UsageEvent, log_usage
-
 from .result import Result
 
 """
@@ -24,7 +23,7 @@
 
 
 class NumericalRangeCalculator(AbstractCalculator):
-    """NumericalRangeCalculator implementation to ensure inference data numerical ranges match training."""
+    """NumericalRangeCalculator ensures the monitoring data set numerical ranges match the reference data set ones."""
 
     def __init__(
         self,
@@ -68,7 +67,8 @@ def __init__(
         --------
         >>> import nannyml as nml
         >>> reference_df, analysis_df, _ = nml.load_synthetic_car_price_dataset()
-        >>> feature_column_names = [col for col in reference_df.columns if col not in ['fuel','transmission','timestamp', 'y_pred', 'y_true']]
+        >>> feature_column_names = [col for col in reference_df.columns if col not in [
+        ...     'fuel','transmission','timestamp', 'y_pred', 'y_true']]
         >>> calc = nml.NumericalRangeCalculator(
         ...     column_names=feature_column_names,
         ...     timestamp_column_name='timestamp',
@@ -117,7 +117,7 @@ def __init__(
     def _calculate_out_of_range_stats(self, data: pd.Series, lower_bound: float, upper_bound: float):
         # to do make this calc out of range stats
         count_tot = data.shape[0]
-        count_out_of_range  = ((data < lower_bound) | (data > upper_bound)).sum()
+        count_out_of_range = ((data < lower_bound) | (data > upper_bound)).sum()
         if self.normalize:
             count_out_of_range = count_out_of_range / count_tot
         return count_out_of_range
@@ -138,7 +138,7 @@ def _fit(self, reference_data: pd.DataFrame, *args, **kwargs):
                 f"Specified columns_names for NumericalRangeCalculator must all be continuous.\n"
                 f"Categorical columns found:\n{categorical_column_names}"
             )
-        
+
         for col in self.column_names:
             self._reference_value_ranges[col] = [reference_data[col].min(), reference_data[col].max()]
 
@@ -212,7 +212,10 @@ def _calculate_for_column(self, data: pd.DataFrame, column_name: str) -> Dict[st
 
     def _set_metric_thresholds(self, result_data: pd.DataFrame):
         for column_name in self.column_names:
-            self._lower_alert_thresholds[column_name], self._upper_alert_thresholds[column_name] = calculate_threshold_values(  # noqa: E501
+            (
+                self._lower_alert_thresholds[column_name],
+                self._upper_alert_thresholds[column_name],
+            ) = calculate_threshold_values(  # noqa: E501
                 threshold=self.threshold,
                 data=result_data.loc[:, (column_name, 'value')],
                 lower_threshold_value_limit=self.lower_threshold_value_limit,
@@ -227,11 +230,17 @@ def _populate_alert_thresholds(self, result_data: pd.DataFrame) -> pd.DataFrame:
             result_data[(column_name, 'alert')] = result_data.apply(
                 lambda row: True
                 if (
-                    row[(column_name, 'value')] > (
-                        np.inf if row[(column_name, 'upper_threshold')] is None else row[(column_name, 'upper_threshold')]  # noqa: E501
+                    row[(column_name, 'value')]
+                    > (
+                        np.inf
+                        if row[(column_name, 'upper_threshold')] is None
+                        else row[(column_name, 'upper_threshold')]  # noqa: E501
                     )
-                    or row[(column_name, 'value')] < (
-                        -np.inf if row[(column_name, 'lower_threshold')] is None else row[(column_name, 'lower_threshold')]  # noqa: E501
+                    or row[(column_name, 'value')]
+                    < (
+                        -np.inf
+                        if row[(column_name, 'lower_threshold')] is None
+                        else row[(column_name, 'lower_threshold')]  # noqa: E501
                     )
                 )
                 else False,

diff --git a/nannyml/data_quality/range/result.py b/nannyml/data_quality/range/result.py
@@ -75,7 +75,8 @@ def plot(
         --------
         >>> import nannyml as nml
         >>> reference, analysis, _ = nml.load_synthetic_car_price_dataset()
-        >>> column_names = [col for col in reference.columns if col not in ['car_age', 'km_driven', 'price_new', 'accident_count', 'door_count','timestamp', 'y_pred', 'y_true']]
+        >>> column_names = [col for col in reference.columns if col not in [
+        ...     'car_age', 'km_driven', 'price_new', 'accident_count', 'door_count','timestamp', 'y_pred', 'y_true']]
         >>> calc = nml.UnseenValuesCalculator(
         ...     column_names=column_names,
         ...     timestamp_column_name='timestamp',

diff --git a/nannyml/data_quality/unseen/calculator.py b/nannyml/data_quality/unseen/calculator.py
@@ -13,6 +13,7 @@
 
 from nannyml.base import AbstractCalculator, _list_missing, _split_features_by_type
 from nannyml.chunk import Chunker
+
 # from nannyml.data_quality.base import _add_alert_flag
 from nannyml.exceptions import InvalidArgumentsException
 from nannyml.thresholds import ConstantThreshold, Threshold, calculate_threshold_values
@@ -69,7 +70,8 @@ def __init__(
         --------
         >>> import nannyml as nml
         >>> reference, analysis, _ = nml.load_synthetic_car_price_dataset()
-        >>> column_names = [col for col in reference.columns if col not in ['car_age', 'km_driven', 'price_new', 'accident_count', 'door_count','timestamp', 'y_pred', 'y_true']]
+        >>> column_names = [col for col in reference.columns if col not in [
+        ...     'car_age', 'km_driven', 'price_new', 'accident_count', 'door_count','timestamp', 'y_pred', 'y_true']]
         >>> calc = nml.UnseenValuesCalculator(
         ...     column_names=column_names,
         ...     timestamp_column_name='timestamp',
@@ -217,7 +219,10 @@ def _calculate_for_column(self, data: pd.DataFrame, column_name: str) -> Dict[st
 
     def _set_metric_thresholds(self, result_data: pd.DataFrame):
         for column_name in self.column_names:
-            self._lower_alert_thresholds[column_name], self._upper_alert_thresholds[column_name] = calculate_threshold_values(  # noqa: E501
+            (
+                self._lower_alert_thresholds[column_name],
+                self._upper_alert_thresholds[column_name],
+            ) = calculate_threshold_values(  # noqa: E501
                 threshold=self.threshold,
                 data=result_data.loc[:, (column_name, 'value')],
                 lower_threshold_value_limit=self.lower_threshold_value_limit,
@@ -232,11 +237,17 @@ def _populate_alert_thresholds(self, result_data: pd.DataFrame) -> pd.DataFrame:
             result_data[(column_name, 'alert')] = result_data.apply(
                 lambda row: True
                 if (
-                    row[(column_name, 'value')] > (
-                        np.inf if row[(column_name, 'upper_threshold')] is None else row[(column_name, 'upper_threshold')]  # noqa: E501
+                    row[(column_name, 'value')]
+                    > (
+                        np.inf
+                        if row[(column_name, 'upper_threshold')] is None
+                        else row[(column_name, 'upper_threshold')]  # noqa: E501
                     )
-                    or row[(column_name, 'value')] < (
-                        -np.inf if row[(column_name, 'lower_threshold')] is None else row[(column_name, 'lower_threshold')]  # noqa: E501
+                    or row[(column_name, 'value')]
+                    < (
+                        -np.inf
+                        if row[(column_name, 'lower_threshold')] is None
+                        else row[(column_name, 'lower_threshold')]  # noqa: E501
                     )
                 )
                 else False,

diff --git a/nannyml/data_quality/unseen/result.py b/nannyml/data_quality/unseen/result.py
@@ -75,7 +75,8 @@ def plot(
         --------
         >>> import nannyml as nml
         >>> reference, analysis, _ = nml.load_synthetic_car_price_dataset()
-        >>> column_names = [col for col in reference.columns if col not in ['car_age', 'km_driven', 'price_new', 'accident_count', 'door_count','timestamp', 'y_pred', 'y_true']]
+        >>> column_names = [col for col in reference.columns if col not in [
+        ....    'car_age', 'km_driven', 'price_new', 'accident_count', 'door_count','timestamp', 'y_pred', 'y_true']]
         >>> calc = nml.UnseenValuesCalculator(
         ...     column_names=column_names,
         ...     timestamp_column_name='timestamp',

diff --git a/nannyml/usage_logging.py b/nannyml/usage_logging.py
@@ -72,7 +72,7 @@ class UsageEvent(str, Enum):
 
     DQ_CALC_VALUES_OUT_OF_RANGE_FIT = "Data Quality Calculator Values Out Of Range fit"
     DQ_CALC_VALUES_OUT_OF_RANGE_RUN = "Data Quality Calculator Values Out Of Range run"
-    DQ_CALC_VALUES_OUT_OF_RANGE_PLOT = "Data Quality Calculator Values Out Of Range Plot"    
+    DQ_CALC_VALUES_OUT_OF_RANGE_PLOT = "Data Quality Calculator Values Out Of Range Plot"
 
     UNIVAR_DRIFT_CALC_FIT = "Univariate drift calculator fit"
     UNIVAR_DRIFT_CALC_RUN = "Univariate drift calculator run"