omerholtzman · omerholtzman · Mar 8, 2022 · Mar 13, 2022 · Mar 13, 2022 · Mar 23, 2022
diff --git a/.gitignore b/.gitignore
@@ -5,7 +5,6 @@ code\ examples/*.gif
 *.gif
 figures/*.png
 .idea
-__pycache__
-lab/.ipynb_checkpoints/
-code\ examples/.ipynb_checkpoints/
+**__pycache__
+**.ipynb_checkpoints
 
diff --git a/IMLearn/__init__.py b/IMLearn/__init__.py
@@ -3,4 +3,6 @@
 __all__ = ["BaseEstimator",
            "BaseDimReducer",
            "learners",
-           "metrics"]
+           "metrics",
+           "model_selection",
+           "utils"]
diff --git a/IMLearn/base/__init__.py b/IMLearn/base/__init__.py
@@ -1,6 +1,6 @@
 from .base_estimator import BaseEstimator
 from .base_module import BaseModule
-from .base_learning_rate import BaseLearningRate
+from .base_learning_rate import BaseLR
 
 
-__all__ = ["BaseEstimator", "BaseModule", "BaseLearningRate"]
+__all__ = ["BaseEstimator", "BaseModule", "BaseLR"]
diff --git a/IMLearn/base/base_learning_rate.py b/IMLearn/base/base_learning_rate.py
@@ -1,7 +1,7 @@
 from abc import ABC
 
 
-class BaseLearningRate(ABC):
+class BaseLR(ABC):
     """
     Base class of learning rates (step size) strategies to be used in different descent methods
     """

diff --git a/IMLearn/base/base_module.py b/IMLearn/base/base_module.py
@@ -1,33 +1,35 @@
+from __future__ import annotations
 from abc import ABC
 import numpy as np
 
 
 class BaseModule(ABC):
     """
-    Base class representing a function to be optimized in a descent method algorithm or a neural network
+    Base class representing a function to be optimized in a descent method algorithm
+
+    Attributes
+    ----------
+    weights_ : ndarray of shape (n_in, n_out)
+        Parameters of function with respect to which the function is optimized.
     """
 
-    def __init__(self):
+    def __init__(self, weights: np.ndarray = None) -> BaseModule:
         """
         Initialize a module instance
 
-        Attributes
+        Parameters:
         ----------
-        _weights : ndarray of shape (n_in, n_out)
-            Parameters of function with respect to which the function is optimized.
-            Set by ``self.compute_output`` function
+        weights: np.ndarray, default None
+            Initial value of weights
         """
-        self._weights = None
+        self.weights_ = weights
 
-    def compute_output(self, input: np.ndarray, **kwargs) -> np.ndarray:
+    def compute_output(self, **kwargs) -> np.ndarray:
         """
         Compute the output value of the function
 
         Parameters
         ----------
-        input: ndarray of shape (n_in,)
-            Input value to evaluate function at
-
         kwargs: Additional arguments to be passed and used by derived objects
 
         Returns
@@ -41,15 +43,12 @@ def compute_output(self, input: np.ndarray, **kwargs) -> np.ndarray:
         """
         raise NotImplementedError()
 
-    def compute_jacobian(self, input: np.ndarray, **kwargs) -> np.ndarray:
+    def compute_jacobian(self, **kwargs) -> np.ndarray:
         """
         Compute the derivative of the function with respect to each of its parameters
 
         Parameters
         ----------
-        input: ndarray of shape (n_in,)
-            Input value to evaluate function derivative at
-
         kwargs: Additional arguments to be passed and used by derived objects
 
         Returns
@@ -73,7 +72,7 @@ def weights(self):
         -------
         weights: ndarray of shape (n_in, n_out)
         """
-        return self._weights
+        return self.weights_
 
     @weights.setter
     def weights(self, weights: np.ndarray) -> None:
@@ -84,7 +83,7 @@ def weights(self, weights: np.ndarray) -> None:
         ----------
         weights: ndarray array of shape (n_in, n_out)
         """
-        self._weights = weights
+        self.weights_ = weights
 
     @property
     def shape(self):

diff --git a/IMLearn/desent_methods/__init__.py b/IMLearn/desent_methods/__init__.py
@@ -1,4 +1,5 @@
 from .gradient_descent import GradientDescent
-from .learning_rate import FixedLR, ExponentialLR, AdaptiveLR
+from .stochastic_gradient_descent import StochasticGradientDescent
+from .learning_rate import FixedLR, ExponentialLR
 
-__all__ = ["GradientDescent", "FixedLR", "ExponentialLR", "AdaptiveLR"]
+__all__ = ["GradientDescent", "StochasticGradientDescent", "FixedLR", "ExponentialLR"]
diff --git a/IMLearn/desent_methods/gradient_descent.py b/IMLearn/desent_methods/gradient_descent.py
@@ -1,19 +1,122 @@
-from IMLearn.base import BaseModule, BaseLearningRate
-from .learning_rate import FixedLR
+from __future__ import annotations
+from typing import Callable, NoReturn
 import numpy as np
 
+from IMLearn.base import BaseModule, BaseLR
+from .learning_rate import FixedLR
+
 OUTPUT_VECTOR_TYPE = ["last", "best", "average"]
 
 
+def default_callback(**kwargs) -> NoReturn:
+    pass
+
+
 class GradientDescent:
     """
     Gradient Descent algorithm
 
-    Gradient descent algorithm for minimizing convex functions
+    Attributes:
+    -----------
+    learning_rate_: BaseLR
+        Learning rate strategy for retrieving the learning rate at each iteration t of the algorithm
+
+    tol_: float
+        The stopping criterion. Training stops when the Euclidean norm of w^(t)-w^(t-1) is less than
+        specified tolerance
+
+    max_iter_: int
+        The maximum number of GD iterations to be performed before stopping training
+
+    out_type_: str
+        Type of returned solution:
+            - `last`: returns the point reached at the last GD iteration
+            - `best`: returns the point achieving the lowest objective
+            - `average`: returns the average point over the GD iterations
+
+    callback_: Callable[[...], None], default=default_callback
+        A callable function to be called after each update of the model while fitting to given data.
+        Callable function receives as input any argument relevant for the current GD iteration. Arguments
+        are specified in the `GradientDescent.fit` function
     """
-    def __init__(self, learning_rate: BaseLearningRate = FixedLR(1e-3), tol: float = 1e-4, max_iter: int = 1000,
-                 out_type: str = "last", batch_size=None):
-        raise NotImplementedError()
+    def __init__(self,
+                 learning_rate: BaseLR = FixedLR(1e-3),
+                 tol: float = 1e-5,
+                 max_iter: int = 1000,
+                 out_type: str = "last",
+                 callback: Callable[[GradientDescent, ...], None] = default_callback):
+        """
+        Instantiate a new instance of the GradientDescent class
+
+        Parameters
+        ----------
+        learning_rate: BaseLR, default=FixedLR(1e-3)
+            Learning rate strategy for retrieving the learning rate at each iteration t of the algorithm
+
+        tol: float, default=1e-5
+            The stopping criterion. Training stops when the Euclidean norm of w^(t)-w^(t-1) is less than
+            specified tolerance
+
+        max_iter: int, default=1000
+            The maximum number of GD iterations to be performed before stopping training
+
+        out_type: str, default="last"
+            Type of returned solution. Supported types are specified in class attributes
+
+        callback: Callable[[...], None], default=default_callback
+            A callable function to be called after each update of the model while fitting to given data.
+            Callable function receives as input any argument relevant for the current GD iteration. Arguments
+            are specified in the `GradientDescent.fit` function
+        """
+        self.learning_rate_ = learning_rate
+        if out_type not in OUTPUT_VECTOR_TYPE:
+            raise ValueError("output_type not supported")
+        self.out_type_ = out_type
+        self.tol_ = tol
+        self.max_iter_ = max_iter
+        self.callback_ = callback
+
+    def fit(self, f: BaseModule, X: np.ndarray, y: np.ndarray):
+        """
+        Optimize module using Gradient Descent iterations over given input samples and responses
+
+        Parameters
+        ----------
+        f : BaseModule
+            Module of objective to optimize using GD iterations
+        X : ndarray of shape (n_samples, n_features)
+            Input data to optimize module over
+        y : ndarray of shape (n_samples, )
+            Responses of input data to optimize module over
+
+        Returns
+        -------
+        solution: ndarray of shape (n_features)
+            Obtained solution for module optimization, according to the specified self.out_type_
+
+        Notes
+        -----
+        - Optimization is performed as long as self.max_iter_ has not been reached and that
+        Euclidean norm of w^(t)-w^(t-1) is more than the specified self.tol_
+
+        - At each iteration the learning rate is specified according to self.learning_rate_.lr_step
+
+        - At the end of each iteration the self.callback_ function is called passing self and the
+        following named arguments:
+            - solver: GradientDescent
+                self, the current instance of GradientDescent
+            - weights: ndarray of shape specified by module's weights
+                Current weights of objective
+            - val: ndarray of shape specified by module's compute_output function
+                Value of objective function at current point, over given data X, y
+            - grad:  ndarray of shape specified by module's compute_jacobian function
+                Module's jacobian with respect to the weights and at current point, over given data X,y
+            - t: int
+                Current GD iteration
+            - eta: float
+                Learning rate used at current iteration
+            - delta: float
+                Euclidean norm of w^(t)-w^(t-1)
 
-    def fit(self, f: BaseModule, X, y):
+        """
         raise NotImplementedError()
diff --git a/IMLearn/desent_methods/learning_rate.py b/IMLearn/desent_methods/learning_rate.py
@@ -1,30 +1,71 @@
 import numpy as np
-from IMLearn.base import BaseModule, BaseLearningRate
 
+from IMLearn.base import BaseModule, BaseLR
 
-class FixedLR(BaseLearningRate):
+
+class FixedLR(BaseLR):
+    """
+    Class representing a fixed learning rate
+    """
     def __init__(self, base_lr: float):
+        """
+        Instantiate a fixed learning-rate object
+
+        Parameters:
+        -----------
+         base_lr: float
+            Learning rate value to be returned at each call
+        """
         super().__init__()
         self.base_lr = base_lr
 
     def lr_step(self, **lr_kwargs) -> float:
-        return self.base_lr
+        """
+        Specify learning rate at call
 
+        Returns:
+        --------
+        eta: float
+            Fixed learning rate specified when initializing instance
 
-class AdaptiveLR(BaseLearningRate):
-    def __init__(self, alpha: float, beta: float):
-        super().__init__()
-        self.alpha = alpha
-        self.beta = beta
-
-    def lr_step(self, f: BaseModule, x: np.ndarray, dx: np.ndarray, **lr_kwargs):
+        Note:
+        -----
+        No arguments are expected
+        """
         raise NotImplementedError()
 
 
 class ExponentialLR(FixedLR):
+    """
+    Class representing an exponentially decaying learning rate
+    """
     def __init__(self, base_lr: float, decay_rate: float):
+        """
+        Instantiate an exponentially decaying learning-rate object, i.e. eta_t = eta*gamma^t
+
+        Parameters:
+        ----------
+        base_lr: float
+            Learning to be returned at t=0 (i.e eta)
+
+        decay_rate: float
+            Decay rate of learning-rate (i.e. gamma)
+        """
         super().__init__(base_lr)
         self.decay_rate = decay_rate
 
-    def lr_step(self, iter, **lr_kwargs) -> float:
+    def lr_step(self, t: int, **lr_kwargs) -> float:
+        """
+        Specify learning rate at call `t`
+
+        Parameters:
+        -----------
+        t: int
+            Step time for which to calculate learning rate
+
+        Returns:
+        --------
+        eta_t: float
+            Exponential decay according to eta_t = eta*gamma^t
+        """
         raise NotImplementedError()